summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/grep/dfa.c
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/grep/dfa.c')
-rw-r--r--gnu/usr.bin/grep/dfa.c88
1 files changed, 63 insertions, 25 deletions
diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c
index 64ff27d..40a926b 100644
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@@ -328,15 +328,20 @@ static reg_syntax_t syntax_bits, syntax_bits_set;
/* Flag for case-folding letters into sets. */
static int case_fold;
+/* End-of-line byte in data. */
+static unsigned char eolbyte;
+
/* Entry point to set syntax options. */
void
-dfasyntax(bits, fold)
+dfasyntax(bits, fold, eol)
reg_syntax_t bits;
int fold;
+ int eol;
{
syntax_bits_set = 1;
syntax_bits = bits;
case_fold = fold;
+ eolbyte = eol;
}
/* Lexical analyzer. All the dross that deals with the obnoxious
@@ -555,11 +560,32 @@ lex()
goto normal_char;
if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
goto normal_char;
- minrep = maxrep = 0;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+
+ if (syntax_bits & RE_NO_BK_BRACES)
+ {
+ /* Scan ahead for a valid interval; if it's not valid,
+ treat it as a literal '{'. */
+ int lo = -1, hi = -1;
+ char const *p = lexptr;
+ char const *lim = p + lexleft;
+ for (; p != lim && ISDIGIT (*p); p++)
+ lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
+ if (p != lim && *p == ',')
+ while (++p != lim && ISDIGIT (*p))
+ hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
+ else
+ hi = lo;
+ if (p == lim || *p != '}'
+ || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
+ goto normal_char;
+ }
+
+ minrep = 0;
/* Cases:
{M} - exact count
{M,} - minimum count, maximum is infinity
- {,M} - 0 through M
{M,N} - M through N */
FETCH(c, _("unfinished repeat count"));
if (ISDIGIT(c))
@@ -573,16 +599,27 @@ lex()
minrep = 10 * minrep + c - '0';
}
}
- else if (c != ',')
+ else
dfaerror(_("malformed repeat count"));
if (c == ',')
- for (;;)
- {
- FETCH(c, _("unfinished repeat count"));
- if (!ISDIGIT(c))
- break;
- maxrep = 10 * maxrep + c - '0';
- }
+ {
+ FETCH (c, _("unfinished repeat count"));
+ if (! ISDIGIT (c))
+ maxrep = -1;
+ else
+ {
+ maxrep = c - '0';
+ for (;;)
+ {
+ FETCH (c, _("unfinished repeat count"));
+ if (! ISDIGIT (c))
+ break;
+ maxrep = 10 * maxrep + c - '0';
+ }
+ if (0 <= maxrep && maxrep < minrep)
+ dfaerror (_("malformed repeat count"));
+ }
+ }
else
maxrep = minrep;
if (!(syntax_bits & RE_NO_BK_BRACES))
@@ -634,7 +671,7 @@ lex()
zeroset(ccl);
notset(ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
- clrbit('\n', ccl);
+ clrbit(eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit('\0', ccl);
laststart = 0;
@@ -732,7 +769,7 @@ lex()
{
notset(ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
- clrbit('\n', ccl);
+ clrbit(eolbyte, ccl);
}
laststart = 0;
return lasttok = CSET + charclass_index(ccl);
@@ -898,7 +935,7 @@ closure()
{
ntokens = nsubtoks(dfa->tindex);
tindex = dfa->tindex - ntokens;
- if (maxrep == 0)
+ if (maxrep < 0)
addtok(PLUS);
if (minrep == 0)
addtok(QMARK);
@@ -1561,7 +1598,7 @@ dfastate(s, d, trans)
for (i = 0; i < NOTCHAR; ++i)
if (IS_WORD_CONSTITUENT(i))
setbit(i, letters);
- setbit('\n', newline);
+ setbit(eolbyte, newline);
}
zeroset(matches);
@@ -1582,7 +1619,7 @@ dfastate(s, d, trans)
{
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 1))
- clrbit('\n', matches);
+ clrbit(eolbyte, matches);
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 0))
for (j = 0; j < CHARCLASS_INTS; ++j)
@@ -1693,7 +1730,7 @@ dfastate(s, d, trans)
state_letter = state;
for (i = 0; i < NOTCHAR; ++i)
trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
- trans['\n'] = state_newline;
+ trans[eolbyte] = state_newline;
}
else
for (i = 0; i < NOTCHAR; ++i)
@@ -1717,7 +1754,7 @@ dfastate(s, d, trans)
/* Find out if the new state will want any context information. */
wants_newline = 0;
- if (tstbit('\n', labels[i]))
+ if (tstbit(eolbyte, labels[i]))
for (j = 0; j < follows.nelem; ++j)
if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
wants_newline = 1;
@@ -1749,7 +1786,7 @@ dfastate(s, d, trans)
{
int c = j * INTBITS + k;
- if (c == '\n')
+ if (c == eolbyte)
trans[c] = state_newline;
else if (IS_WORD_CONSTITUENT(c))
trans[c] = state_letter;
@@ -1840,8 +1877,8 @@ build_state(s, d)
/* Keep the newline transition in a special place so we can use it as
a sentinel. */
- d->newlines[s] = trans['\n'];
- trans['\n'] = -1;
+ d->newlines[s] = trans[eolbyte];
+ trans[eolbyte] = -1;
if (ACCEPTING(s, *d))
d->fails[s] = trans;
@@ -1889,6 +1926,7 @@ dfaexec(d, begin, end, newline, count, backref)
register unsigned char *p; /* Current input character. */
register int **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
+ register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
static int sbit[NOTCHAR]; /* Table for anding with d->success. */
static int sbit_init;
@@ -1899,7 +1937,7 @@ dfaexec(d, begin, end, newline, count, backref)
sbit_init = 1;
for (i = 0; i < NOTCHAR; ++i)
sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
- sbit['\n'] = 4;
+ sbit[eol] = 4;
}
if (! d->tralloc)
@@ -1908,7 +1946,7 @@ dfaexec(d, begin, end, newline, count, backref)
s = s1 = 0;
p = (unsigned char *) begin;
trans = d->trans;
- *end = '\n';
+ *end = eol;
for (;;)
{
@@ -1936,7 +1974,7 @@ dfaexec(d, begin, end, newline, count, backref)
}
/* If the previous character was a newline, count it. */
- if (count && (char *) p <= end && p[-1] == '\n')
+ if (count && (char *) p <= end && p[-1] == eol)
++*count;
/* Check if we've run off the end of the buffer. */
@@ -1950,7 +1988,7 @@ dfaexec(d, begin, end, newline, count, backref)
continue;
}
- if (p[-1] == '\n' && newline)
+ if (p[-1] == eol && newline)
{
s = d->newlines[s1];
continue;
OpenPOWER on IntegriCloud