summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--gnu/usr.bin/grep/dfa.c285
1 files changed, 93 insertions, 192 deletions
diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c
index 1fcf549..76dd0b2 100644
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@@ -79,6 +79,16 @@ extern void free();
#define ISCNTRL(C) (isascii(C) && iscntrl(C))
#endif
+/* ISASCIIDIGIT differs from ISDIGIT, as follows:
+ - Its arg may be any int or unsigned int; it need not be an unsigned char.
+ - It's guaranteed to evaluate its argument exactly once.
+ - It's typically faster.
+ Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
+ only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless
+ it's important to use the locale's definition of `digit' even when the
+ host does not conform to Posix. */
+#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
+
/* If we (don't) have I18N. */
/* glibc defines _ */
#ifndef _
@@ -150,29 +160,8 @@ static char **comsubs PARAMS ((char *left, char *right));
static char **addlists PARAMS ((char **old, char **new));
static char **inboth PARAMS ((char **left, char **right));
-#ifdef __FreeBSD__
-static int
-collate_range_cmp(c1, c2)
- int c1, c2;
-{
- static char s1[2], s2[2];
- int r;
-
- if (c1 == c2)
- return 0;
- s1[0] = c1;
- s2[0] = c2;
- if ((r = strcoll(s1, s2)) == 0)
- r = c1 - c2;
-
- return r;
-}
-#endif
-
static ptr_t
-xcalloc(n, s)
- size_t n;
- size_t s;
+xcalloc (size_t n, size_t s)
{
ptr_t r = calloc(n, s);
@@ -182,8 +171,7 @@ xcalloc(n, s)
}
static ptr_t
-xmalloc(n)
- size_t n;
+xmalloc (size_t n)
{
ptr_t r = malloc(n);
@@ -194,9 +182,7 @@ xmalloc(n)
}
static ptr_t
-xrealloc(p, n)
- ptr_t p;
- size_t n;
+xrealloc (ptr_t p, size_t n)
{
ptr_t r = realloc(p, n);
@@ -222,8 +208,7 @@ xrealloc(p, n)
#ifdef DEBUG
static void
-prtok(t)
- token t;
+prtok (token t)
{
char *s;
@@ -261,33 +246,25 @@ prtok(t)
/* Stuff pertaining to charclasses. */
static int
-tstbit(b, c)
- int b;
- charclass c;
+tstbit (int b, charclass c)
{
return c[b / INTBITS] & 1 << b % INTBITS;
}
static void
-setbit(b, c)
- int b;
- charclass c;
+setbit (int b, charclass c)
{
c[b / INTBITS] |= 1 << b % INTBITS;
}
static void
-clrbit(b, c)
- int b;
- charclass c;
+clrbit (int b, charclass c)
{
c[b / INTBITS] &= ~(1 << b % INTBITS);
}
static void
-copyset(src, dst)
- charclass src;
- charclass dst;
+copyset (charclass src, charclass dst)
{
int i;
@@ -296,8 +273,7 @@ copyset(src, dst)
}
static void
-zeroset(s)
- charclass s;
+zeroset (charclass s)
{
int i;
@@ -306,8 +282,7 @@ zeroset(s)
}
static void
-notset(s)
- charclass s;
+notset (charclass s)
{
int i;
@@ -316,9 +291,7 @@ notset(s)
}
static int
-equal(s1, s2)
- charclass s1;
- charclass s2;
+equal (charclass s1, charclass s2)
{
int i;
@@ -333,8 +306,7 @@ static struct dfa *dfa;
/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
static int
-charclass_index(s)
- charclass s;
+charclass_index (charclass s)
{
int i;
@@ -358,10 +330,7 @@ static unsigned char eolbyte;
/* Entry point to set syntax options. */
void
-dfasyntax(bits, fold, eol)
- reg_syntax_t bits;
- int fold;
- int eol;
+dfasyntax (reg_syntax_t bits, int fold, int eol)
{
syntax_bits_set = 1;
syntax_bits = bits;
@@ -387,10 +356,12 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */
#define FETCH(c, eoferr) \
{ \
if (! lexleft) \
- if (eoferr != 0) \
- dfaerror(eoferr); \
- else \
- return lasttok = END; \
+ { \
+ if (eoferr != 0) \
+ dfaerror (eoferr); \
+ else \
+ return lasttok = END; \
+ } \
(c) = (unsigned char) *lexptr++; \
--lexleft; \
}
@@ -413,8 +384,8 @@ FUNC(is_print, ISPRINT)
FUNC(is_graph, ISGRAPH)
FUNC(is_cntrl, ISCNTRL)
-static int is_blank(c)
-int c;
+static int
+is_blank (int c)
{
return (c == ' ' || c == '\t');
}
@@ -445,8 +416,7 @@ static struct {
#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
static int
-looking_at(s)
- const char *s;
+looking_at (char const *s)
{
size_t len;
@@ -457,12 +427,14 @@ looking_at(s)
}
static token
-lex()
+lex (void)
{
token c, c1, c2;
int backslash = 0, invert;
charclass ccl;
int i;
+ char lo[2];
+ char hi[2];
/* Basic plan: We fetch a character. If it's a backslash,
we set the backslash flag and go through the loop again.
@@ -595,10 +567,10 @@ lex()
int lo = -1, hi = -1;
char const *p = lexptr;
char const *lim = p + lexleft;
- for (; p != lim && ISDIGIT (*p); p++)
+ for (; p != lim && ISASCIIDIGIT (*p); p++)
lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
if (p != lim && *p == ',')
- while (++p != lim && ISDIGIT (*p))
+ while (++p != lim && ISASCIIDIGIT (*p))
hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
else
hi = lo;
@@ -613,13 +585,13 @@ lex()
{M,} - minimum count, maximum is infinity
{M,N} - M through N */
FETCH(c, _("unfinished repeat count"));
- if (ISDIGIT(c))
+ if (ISASCIIDIGIT (c))
{
minrep = c - '0';
for (;;)
{
FETCH(c, _("unfinished repeat count"));
- if (!ISDIGIT(c))
+ if (! ISASCIIDIGIT (c))
break;
minrep = 10 * minrep + c - '0';
}
@@ -629,7 +601,7 @@ lex()
if (c == ',')
{
FETCH (c, _("unfinished repeat count"));
- if (! ISDIGIT (c))
+ if (! ISASCIIDIGIT (c))
maxrep = -1;
else
{
@@ -637,7 +609,7 @@ lex()
for (;;)
{
FETCH (c, _("unfinished repeat count"));
- if (! ISDIGIT (c))
+ if (! ISASCIIDIGIT (c))
break;
maxrep = 10 * maxrep + c - '0';
}
@@ -776,35 +748,26 @@ lex()
}
else
c2 = c;
-#ifdef __FreeBSD__
- if (collate_range_cmp(c, c2) <= 0)
- {
- token c3;
-
- for (c3 = 0; c3 < NOTCHAR; ++c3) {
- if (collate_range_cmp(c, c3) <= 0 &&
- collate_range_cmp(c3, c2) <= 0) {
- setbit(c3, ccl);
- if (case_fold)
- if (ISUPPER(c3))
- setbit(tolower(c3), ccl);
- else if (ISLOWER(c))
- setbit(toupper(c3), ccl);
- }
- }
- }
-#else
- while (c <= c2)
+
+ lo[0] = c; lo[1] = '\0';
+ hi[0] = c2; hi[1] = '\0';
+ for (c = 0; c < NOTCHAR; c++)
{
- setbit(c, ccl);
- if (case_fold)
- if (ISUPPER(c))
- setbit(tolower(c), ccl);
- else if (ISLOWER(c))
- setbit(toupper(c), ccl);
- ++c;
+ char ch[2];
+ ch[0] = c; ch[1] = '\0';
+ if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
+ {
+ setbit (c, ccl);
+ if (case_fold)
+ {
+ if (ISUPPER (c))
+ setbit (tolower (c), ccl);
+ else if (ISLOWER (c))
+ setbit (toupper (c), ccl);
+ }
+ }
}
-#endif
+
skip:
;
}
@@ -853,8 +816,7 @@ static int depth; /* Current depth of a hypothetical stack
/* Add the given token to the parse tree, maintaining the depth count and
updating the maximum depth if necessary. */
static void
-addtok(t)
- token t;
+addtok (token t)
{
REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
dfa->tokens[dfa->tindex++] = t;
@@ -913,7 +875,7 @@ addtok(t)
The parser builds a parse tree in postfix form in an array of tokens. */
static void
-atom()
+atom (void)
{
if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@@ -936,8 +898,7 @@ atom()
/* Return the number of tokens in the given subexpression. */
static int
-nsubtoks(tindex)
-int tindex;
+nsubtoks (int tindex)
{
int ntoks1;
@@ -959,8 +920,7 @@ int tindex;
/* Copy the given subexpression to the top of the tree. */
static void
-copytoks(tindex, ntokens)
- int tindex, ntokens;
+copytoks (int tindex, int ntokens)
{
int i;
@@ -969,7 +929,7 @@ copytoks(tindex, ntokens)
}
static void
-closure()
+closure (void)
{
int tindex, ntokens, i;
@@ -1004,7 +964,7 @@ closure()
}
static void
-branch()
+branch (void)
{
closure();
while (tok != RPAREN && tok != OR && tok >= 0)
@@ -1015,8 +975,7 @@ branch()
}
static void
-regexp(toplevel)
- int toplevel;
+regexp (int toplevel)
{
branch();
while (tok == OR)
@@ -1034,11 +993,7 @@ regexp(toplevel)
length of the string, so s can include NUL characters. D is a pointer to
the struct dfa to parse into. */
void
-dfaparse(s, len, d)
- char *s;
- size_t len;
- struct dfa *d;
-
+dfaparse (char *s, size_t len, struct dfa *d)
{
dfa = d;
lexstart = lexptr = s;
@@ -1071,9 +1026,7 @@ dfaparse(s, len, d)
/* Copy one set to another; the destination must be large enough. */
static void
-copy(src, dst)
- position_set *src;
- position_set *dst;
+copy (position_set *src, position_set *dst)
{
int i;
@@ -1087,9 +1040,7 @@ copy(src, dst)
the same index then their constraints are logically or'd together.
S->elems must point to an array large enough to hold the resulting set. */
static void
-insert(p, s)
- position p;
- position_set *s;
+insert (position p, position_set *s)
{
int i;
position t1, t2;
@@ -1114,10 +1065,7 @@ insert(p, s)
/* Merge two sets of positions into a third. The result is exactly as if
the positions of both sets were inserted into an initially empty set. */
static void
-merge(s1, s2, m)
- position_set *s1;
- position_set *s2;
- position_set *m;
+merge (position_set *s1, position_set *s2, position_set *m)
{
int i = 0, j = 0;
@@ -1140,9 +1088,7 @@ merge(s1, s2, m)
/* Delete a position from a set. */
static void
-delete(p, s)
- position p;
- position_set *s;
+delete (position p, position_set *s)
{
int i;
@@ -1159,11 +1105,7 @@ delete(p, s)
state. Newline and letter tell whether we got here on a newline or
letter, respectively. */
static int
-state_index(d, s, newline, letter)
- struct dfa *d;
- position_set *s;
- int newline;
- int letter;
+state_index (struct dfa *d, position_set *s, int newline, int letter)
{
int hash = 0;
int constraint;
@@ -1228,12 +1170,8 @@ state_index(d, s, newline, letter)
that position with the elements of its follow labeled with an appropriate
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
-static void epsclosure PARAMS ((position_set *s, struct dfa *d));
-
static void
-epsclosure(s, d)
- position_set *s;
- struct dfa *d;
+epsclosure (position_set *s, struct dfa *d)
{
int i, j;
int *visited;
@@ -1345,9 +1283,7 @@ epsclosure(s, d)
scheme; the number of elements in each set deeper in the stack can be
used to determine the address of a particular set's array. */
void
-dfaanalyze(d, searchflag)
- struct dfa *d;
- int searchflag;
+dfaanalyze (struct dfa *d, int searchflag)
{
int *nullable; /* Nullable stack. */
int *nfirstpos; /* Element count stack for firstpos sets. */
@@ -1608,10 +1544,7 @@ dfaanalyze(d, searchflag)
create a new group labeled with the characters of C and insert this
position in that group. */
void
-dfastate(s, d, trans)
- int s;
- struct dfa *d;
- int trans[];
+dfastate (int s, struct dfa *d, int trans[])
{
position_set grps[NOTCHAR]; /* As many as will ever be needed. */
charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
@@ -1853,9 +1786,7 @@ dfastate(s, d, trans)
TODO: Improve this comment, get rid of the unnecessary redundancy. */
static void
-build_state(s, d)
- int s;
- struct dfa *d;
+build_state (int s, struct dfa *d)
{
int *trans; /* The new transition table. */
int i;
@@ -1931,8 +1862,7 @@ build_state(s, d)
}
static void
-build_state_zero(d)
- struct dfa *d;
+build_state_zero (struct dfa *d)
{
d->tralloc = 1;
d->trcount = 0;
@@ -1958,13 +1888,8 @@ build_state_zero(d)
match needs to be verified by a backtracking matcher. Otherwise
we store a 0 in *backref. */
char *
-dfaexec(d, begin, end, newline, count, backref)
- struct dfa *d;
- char *begin;
- char *end;
- int newline;
- int *count;
- int *backref;
+dfaexec (struct dfa *d, char *begin, char *end,
+ int newline, int *count, int *backref)
{
register int s, s1, tmp; /* Current state. */
register unsigned char *p; /* Current input character. */
@@ -2045,8 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
/* Initialize the components of a dfa that the other routines don't
initialize for themselves. */
void
-dfainit(d)
- struct dfa *d;
+dfainit (struct dfa *d)
{
d->calloc = 1;
MALLOC(d->charclasses, charclass, d->calloc);
@@ -2064,11 +1988,7 @@ dfainit(d)
/* Parse and analyze a single string of the given length. */
void
-dfacomp(s, len, d, searchflag)
- char *s;
- size_t len;
- struct dfa *d;
- int searchflag;
+dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
{
if (case_fold) /* dummy folding in service of dfamust() */
{
@@ -2107,8 +2027,7 @@ dfacomp(s, len, d, searchflag)
/* Free the storage held by the components of a dfa. */
void
-dfafree(d)
- struct dfa *d;
+dfafree (struct dfa *d)
{
int i;
struct dfamust *dm, *ndm;
@@ -2220,9 +2139,7 @@ dfafree(d)
'psi|epsilon' is likelier)? */
static char *
-icatalloc(old, new)
- char *old;
- char *new;
+icatalloc (char *old, char *new)
{
char *result;
size_t oldsize, newsize;
@@ -2243,16 +2160,13 @@ icatalloc(old, new)
}
static char *
-icpyalloc(string)
- char *string;
+icpyalloc (char *string)
{
return icatalloc((char *) NULL, string);
}
static char *
-istrstr(lookin, lookfor)
- char *lookin;
- char *lookfor;
+istrstr (char *lookin, char *lookfor)
{
char *cp;
size_t len;
@@ -2265,16 +2179,14 @@ istrstr(lookin, lookfor)
}
static void
-ifree(cp)
- char *cp;
+ifree (char *cp)
{
if (cp != NULL)
free(cp);
}
static void
-freelist(cpp)
- char **cpp;
+freelist (char **cpp)
{
int i;
@@ -2288,10 +2200,7 @@ freelist(cpp)
}
static char **
-enlist(cpp, new, len)
- char **cpp;
- char *new;
- size_t len;
+enlist (char **cpp, char *new, size_t len)
{
int i, j;
@@ -2336,9 +2245,7 @@ enlist(cpp, new, len)
list of their distinct common substrings. Return NULL if something
seems wild. */
static char **
-comsubs(left, right)
- char *left;
- char *right;
+comsubs (char *left, char *right)
{
char **cpp;
char *lcp;
@@ -2372,9 +2279,7 @@ comsubs(left, right)
}
static char **
-addlists(old, new)
-char **old;
-char **new;
+addlists (char **old, char **new)
{
int i;
@@ -2392,9 +2297,7 @@ char **new;
/* Given two lists of substrings, return a new list giving substrings
common to both. */
static char **
-inboth(left, right)
- char **left;
- char **right;
+inboth (char **left, char **right)
{
char **both;
char **temp;
@@ -2435,16 +2338,14 @@ typedef struct
} must;
static void
-resetmust(mp)
-must *mp;
+resetmust (must *mp)
{
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
freelist(mp->in);
}
static void
-dfamust(dfa)
-struct dfa *dfa;
+dfamust (struct dfa *dfa)
{
must *musts;
must *mp;
OpenPOWER on IntegriCloud