summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--gnu/usr.bin/grep/dfa.c2677
-rw-r--r--gnu/usr.bin/grep/dfa.h327
-rw-r--r--gnu/usr.bin/grep/getopt.c654
-rw-r--r--gnu/usr.bin/grep/getopt.h62
-rw-r--r--gnu/usr.bin/grep/getopt1.c189
-rw-r--r--gnu/usr.bin/grep/getpagesize.h77
-rw-r--r--gnu/usr.bin/grep/grep.c1959
-rw-r--r--gnu/usr.bin/grep/grep.h35
-rw-r--r--gnu/usr.bin/grep/kwset.c75
-rw-r--r--gnu/usr.bin/grep/kwset.h34
-rw-r--r--gnu/usr.bin/grep/obstack.c223
-rw-r--r--gnu/usr.bin/grep/obstack.h241
-rw-r--r--gnu/usr.bin/grep/savedir.c135
-rw-r--r--gnu/usr.bin/grep/savedir.h15
-rw-r--r--gnu/usr.bin/grep/search.c109
-rw-r--r--gnu/usr.bin/grep/stpcpy.c52
-rw-r--r--gnu/usr.bin/grep/system.h188
17 files changed, 4359 insertions, 2693 deletions
diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c
index 08b383d..64ff27d 100644
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@@ -1,5 +1,5 @@
-/* dfa.c - determinisitic extended regexp routines for GNU
- Copyright (C) 1988 Free Software Foundation, Inc.
+/* dfa.c - deterministic extended regexp routines for GNU
+ Copyright (C) 1988, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,46 +13,150 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
/* Written June, 1988 by Mike Haertel
Modified July, 1988 by Arthur David Olson to assist BMG speedups */
-
-#include <stdio.h>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
-#if defined(USG) || defined(STDC_HEADERS)
+#include <sys/types.h>
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+extern char *calloc(), *malloc(), *realloc();
+extern void free();
+#endif
+
+#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
-#ifndef index
+#undef index
#define index strchr
-#endif
#else
#include <strings.h>
#endif
-#include "dfa.h"
+#ifndef DEBUG /* use the same approach as regex.c */
+#undef assert
+#define assert(e)
+#endif /* DEBUG */
+
+#ifndef isgraph
+#define isgraph(C) (isprint(C) && !isspace(C))
+#endif
-#if __STDC__
-typedef void *ptr_t;
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISALPHA(C) isalpha(C)
+#define ISUPPER(C) isupper(C)
+#define ISLOWER(C) islower(C)
+#define ISDIGIT(C) isdigit(C)
+#define ISXDIGIT(C) isxdigit(C)
+#define ISSPACE(C) isspace(C)
+#define ISPUNCT(C) ispunct(C)
+#define ISALNUM(C) isalnum(C)
+#define ISPRINT(C) isprint(C)
+#define ISGRAPH(C) isgraph(C)
+#define ISCNTRL(C) iscntrl(C)
#else
-typedef char *ptr_t;
+#define ISALPHA(C) (isascii(C) && isalpha(C))
+#define ISUPPER(C) (isascii(C) && isupper(C))
+#define ISLOWER(C) (isascii(C) && islower(C))
+#define ISDIGIT(C) (isascii(C) && isdigit(C))
+#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
+#define ISSPACE(C) (isascii(C) && isspace(C))
+#define ISPUNCT(C) (isascii(C) && ispunct(C))
+#define ISALNUM(C) (isascii(C) && isalnum(C))
+#define ISPRINT(C) (isascii(C) && isprint(C))
+#define ISGRAPH(C) (isascii(C) && isgraph(C))
+#define ISCNTRL(C) (isascii(C) && iscntrl(C))
#endif
-static void regmust();
+/* If we (don't) have I18N. */
+/* glibc defines _ */
+#ifndef _
+# ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# ifndef _
+# define _(Str) gettext (Str)
+# endif
+# else
+# define _(Str) (Str)
+# endif
+#endif
+
+#include "regex.h"
+#include "dfa.h"
+
+/* HPUX, define those as macros in sys/param.h */
+#ifdef setbit
+# undef setbit
+#endif
+#ifdef clrbit
+# undef clrbit
+#endif
+
+static void dfamust PARAMS ((struct dfa *dfa));
+
+static ptr_t xcalloc PARAMS ((size_t n, size_t s));
+static ptr_t xmalloc PARAMS ((size_t n));
+static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
+#ifdef DEBUG
+static void prtok PARAMS ((token t));
+#endif
+static int tstbit PARAMS ((int b, charclass c));
+static void setbit PARAMS ((int b, charclass c));
+static void clrbit PARAMS ((int b, charclass c));
+static void copyset PARAMS ((charclass src, charclass dst));
+static void zeroset PARAMS ((charclass s));
+static void notset PARAMS ((charclass s));
+static int equal PARAMS ((charclass s1, charclass s2));
+static int charclass_index PARAMS ((charclass s));
+static int looking_at PARAMS ((const char *s));
+static token lex PARAMS ((void));
+static void addtok PARAMS ((token t));
+static void atom PARAMS ((void));
+static int nsubtoks PARAMS ((int tindex));
+static void copytoks PARAMS ((int tindex, int ntokens));
+static void closure PARAMS ((void));
+static void branch PARAMS ((void));
+static void regexp PARAMS ((int toplevel));
+static void copy PARAMS ((position_set *src, position_set *dst));
+static void insert PARAMS ((position p, position_set *s));
+static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
+static void delete PARAMS ((position p, position_set *s));
+static int state_index PARAMS ((struct dfa *d, position_set *s,
+ int newline, int letter));
+static void build_state PARAMS ((int s, struct dfa *d));
+static void build_state_zero PARAMS ((struct dfa *d));
+static char *icatalloc PARAMS ((char *old, char *new));
+static char *icpyalloc PARAMS ((char *string));
+static char *istrstr PARAMS ((char *lookin, char *lookfor));
+static void ifree PARAMS ((char *cp));
+static void freelist PARAMS ((char **cpp));
+static char **enlist PARAMS ((char **cpp, char *new, size_t len));
+static char **comsubs PARAMS ((char *left, char *right));
+static char **addlists PARAMS ((char **old, char **new));
+static char **inboth PARAMS ((char **left, char **right));
static ptr_t
xcalloc(n, s)
- int n;
+ size_t n;
size_t s;
{
ptr_t r = calloc(n, s);
if (!r)
- regerror("Memory exhausted");
+ dfaerror(_("Memory exhausted"));
return r;
}
-ptr_t /* Not static, so alloca.o can use it. */
+static ptr_t
xmalloc(n)
size_t n;
{
@@ -60,7 +164,7 @@ xmalloc(n)
assert(n != 0);
if (!r)
- regerror("Memory exhausted");
+ dfaerror(_("Memory exhausted"));
return r;
}
@@ -73,11 +177,11 @@ xrealloc(p, n)
assert(n != 0);
if (!r)
- regerror("Memory exhausted");
+ dfaerror(_("Memory exhausted"));
return r;
}
-#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
@@ -89,54 +193,52 @@ xrealloc(p, n)
(nalloc) *= 2; \
REALLOC(p, t, nalloc); \
}
-
+
#ifdef DEBUG
-#include <stdio.h>
static void
prtok(t)
- _token t;
+ token t;
{
char *s;
if (t < 0)
fprintf(stderr, "END");
- else if (t < _NOTCHAR)
+ else if (t < NOTCHAR)
fprintf(stderr, "%c", t);
else
{
switch (t)
{
- case _EMPTY: s = "EMPTY"; break;
- case _BACKREF: s = "BACKREF"; break;
- case _BEGLINE: s = "BEGLINE"; break;
- case _ALLBEGLINE: s = "ALLBEGLINE"; break;
- case _ENDLINE: s = "ENDLINE"; break;
- case _ALLENDLINE: s = "ALLENDLINE"; break;
- case _BEGWORD: s = "BEGWORD"; break;
- case _ENDWORD: s = "ENDWORD"; break;
- case _LIMWORD: s = "LIMWORD"; break;
- case _NOTLIMWORD: s = "NOTLIMWORD"; break;
- case _QMARK: s = "QMARK"; break;
- case _STAR: s = "STAR"; break;
- case _PLUS: s = "PLUS"; break;
- case _CAT: s = "CAT"; break;
- case _OR: s = "OR"; break;
- case _LPAREN: s = "LPAREN"; break;
- case _RPAREN: s = "RPAREN"; break;
- default: s = "SET"; break;
+ case EMPTY: s = "EMPTY"; break;
+ case BACKREF: s = "BACKREF"; break;
+ case BEGLINE: s = "BEGLINE"; break;
+ case ENDLINE: s = "ENDLINE"; break;
+ case BEGWORD: s = "BEGWORD"; break;
+ case ENDWORD: s = "ENDWORD"; break;
+ case LIMWORD: s = "LIMWORD"; break;
+ case NOTLIMWORD: s = "NOTLIMWORD"; break;
+ case QMARK: s = "QMARK"; break;
+ case STAR: s = "STAR"; break;
+ case PLUS: s = "PLUS"; break;
+ case CAT: s = "CAT"; break;
+ case OR: s = "OR"; break;
+ case ORTOP: s = "ORTOP"; break;
+ case LPAREN: s = "LPAREN"; break;
+ case RPAREN: s = "RPAREN"; break;
+ default: s = "CSET"; break;
}
fprintf(stderr, "%s", s);
}
}
#endif /* DEBUG */
-/* Stuff pertaining to charsets. */
+/* Stuff pertaining to charclasses. */
static int
tstbit(b, c)
int b;
- _charset c;
+ charclass c;
{
return c[b / INTBITS] & 1 << b % INTBITS;
}
@@ -144,7 +246,7 @@ tstbit(b, c)
static void
setbit(b, c)
int b;
- _charset c;
+ charclass c;
{
c[b / INTBITS] |= 1 << b % INTBITS;
}
@@ -152,84 +254,84 @@ setbit(b, c)
static void
clrbit(b, c)
int b;
- _charset c;
+ charclass c;
{
c[b / INTBITS] &= ~(1 << b % INTBITS);
}
static void
copyset(src, dst)
- const _charset src;
- _charset dst;
+ charclass src;
+ charclass dst;
{
int i;
- for (i = 0; i < _CHARSET_INTS; ++i)
+ for (i = 0; i < CHARCLASS_INTS; ++i)
dst[i] = src[i];
}
static void
zeroset(s)
- _charset s;
+ charclass s;
{
int i;
- for (i = 0; i < _CHARSET_INTS; ++i)
+ for (i = 0; i < CHARCLASS_INTS; ++i)
s[i] = 0;
}
static void
notset(s)
- _charset s;
+ charclass s;
{
int i;
- for (i = 0; i < _CHARSET_INTS; ++i)
+ for (i = 0; i < CHARCLASS_INTS; ++i)
s[i] = ~s[i];
}
static int
equal(s1, s2)
- const _charset s1;
- const _charset s2;
+ charclass s1;
+ charclass s2;
{
int i;
- for (i = 0; i < _CHARSET_INTS; ++i)
+ for (i = 0; i < CHARCLASS_INTS; ++i)
if (s1[i] != s2[i])
return 0;
return 1;
}
-
-/* A pointer to the current regexp is kept here during parsing. */
-static struct regexp *reg;
-/* Find the index of charset s in reg->charsets, or allocate a new charset. */
+/* A pointer to the current dfa is kept here during parsing. */
+static struct dfa *dfa;
+
+/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
static int
-charset_index(s)
- const _charset s;
+charclass_index(s)
+ charclass s;
{
int i;
- for (i = 0; i < reg->cindex; ++i)
- if (equal(s, reg->charsets[i]))
+ for (i = 0; i < dfa->cindex; ++i)
+ if (equal(s, dfa->charclasses[i]))
return i;
- REALLOC_IF_NECESSARY(reg->charsets, _charset, reg->calloc, reg->cindex);
- ++reg->cindex;
- copyset(s, reg->charsets[i]);
+ REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex);
+ ++dfa->cindex;
+ copyset(s, dfa->charclasses[i]);
return i;
}
/* Syntax bits controlling the behavior of the lexical analyzer. */
-static syntax_bits, syntax_bits_set;
+static reg_syntax_t syntax_bits, syntax_bits_set;
/* Flag for case-folding letters into sets. */
-static case_fold;
+static int case_fold;
/* Entry point to set syntax options. */
void
-regsyntax(bits, fold)
- int bits;
+dfasyntax(bits, fold)
+ reg_syntax_t bits;
int fold;
{
syntax_bits_set = 1;
@@ -237,63 +339,146 @@ regsyntax(bits, fold)
case_fold = fold;
}
-/* Lexical analyzer. */
-static const char *lexstart; /* Pointer to beginning of input string. */
-static const char *lexptr; /* Pointer to next input character. */
-static lexleft; /* Number of characters remaining. */
-static caret_allowed; /* True if backward context allows ^
- (meaningful only if RE_CONTEXT_INDEP_OPS
- is turned off). */
-static closure_allowed; /* True if backward context allows closures
- (meaningful only if RE_CONTEXT_INDEP_OPS
- is turned off). */
+/* Lexical analyzer. All the dross that deals with the obnoxious
+ GNU Regex syntax bits is located here. The poor, suffering
+ reader is referred to the GNU Regex documentation for the
+ meaning of the @#%!@#%^!@ syntax bits. */
+
+static char *lexstart; /* Pointer to beginning of input string. */
+static char *lexptr; /* Pointer to next input character. */
+static int lexleft; /* Number of characters remaining. */
+static token lasttok; /* Previous token returned; initially END. */
+static int laststart; /* True if we're separated from beginning or (, |
+ only by zero-width characters. */
+static int parens; /* Count of outstanding left parens. */
+static int minrep, maxrep; /* Repeat counts for {m,n}. */
/* Note that characters become unsigned here. */
#define FETCH(c, eoferr) \
{ \
if (! lexleft) \
- if (eoferr) \
- regerror(eoferr); \
+ if (eoferr != 0) \
+ dfaerror(eoferr); \
else \
- return _END; \
+ return lasttok = END; \
(c) = (unsigned char) *lexptr++; \
--lexleft; \
}
-static _token
+#ifdef __STDC__
+#define FUNC(F, P) static int F(int c) { return P(c); }
+#else
+#define FUNC(F, P) static int F(c) int c; { return P(c); }
+#endif
+
+FUNC(is_alpha, ISALPHA)
+FUNC(is_upper, ISUPPER)
+FUNC(is_lower, ISLOWER)
+FUNC(is_digit, ISDIGIT)
+FUNC(is_xdigit, ISXDIGIT)
+FUNC(is_space, ISSPACE)
+FUNC(is_punct, ISPUNCT)
+FUNC(is_alnum, ISALNUM)
+FUNC(is_print, ISPRINT)
+FUNC(is_graph, ISGRAPH)
+FUNC(is_cntrl, ISCNTRL)
+
+static int is_blank(c)
+int c;
+{
+ return (c == ' ' || c == '\t');
+}
+
+/* The following list maps the names of the Posix named character classes
+ to predicate functions that determine whether a given character is in
+ the class. The leading [ has already been eaten by the lexical analyzer. */
+static struct {
+ const char *name;
+ int (*pred) PARAMS ((int));
+} prednames[] = {
+ { ":alpha:]", is_alpha },
+ { ":upper:]", is_upper },
+ { ":lower:]", is_lower },
+ { ":digit:]", is_digit },
+ { ":xdigit:]", is_xdigit },
+ { ":space:]", is_space },
+ { ":punct:]", is_punct },
+ { ":alnum:]", is_alnum },
+ { ":print:]", is_print },
+ { ":graph:]", is_graph },
+ { ":cntrl:]", is_cntrl },
+ { ":blank:]", is_blank },
+ { 0 }
+};
+
+/* Return non-zero if C is a `word-constituent' byte; zero otherwise. */
+#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
+
+static int
+looking_at(s)
+ const char *s;
+{
+ size_t len;
+
+ len = strlen(s);
+ if (lexleft < len)
+ return 0;
+ return strncmp(s, lexptr, len) == 0;
+}
+
+static token
lex()
{
- _token c, c2;
- int invert;
- _charset cset;
+ token c, c1, c2;
+ int backslash = 0, invert;
+ charclass ccl;
+ int i;
- FETCH(c, (char *) 0);
- switch (c)
+ /* Basic plan: We fetch a character. If it's a backslash,
+ we set the backslash flag and go through the loop again.
+ On the plus side, this avoids having a duplicate of the
+ main switch inside the backslash case. On the minus side,
+ it means that just about every case begins with
+ "if (backslash) ...". */
+ for (i = 0; i < 2; ++i)
{
- case '^':
- if (! (syntax_bits & RE_CONTEXT_INDEP_OPS)
- && (!caret_allowed ||
- (syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart))
- goto normal_char;
- caret_allowed = 0;
- return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE;
-
- case '$':
- if (syntax_bits & RE_CONTEXT_INDEP_OPS || !lexleft
- || (! (syntax_bits & RE_TIGHT_VBAR)
- && ((syntax_bits & RE_NO_BK_PARENS
- ? lexleft > 0 && *lexptr == ')'
- : lexleft > 1 && *lexptr == '\\' && lexptr[1] == ')')
- || (syntax_bits & RE_NO_BK_VBAR
- ? lexleft > 0 && *lexptr == '|'
- : lexleft > 1 && *lexptr == '\\' && lexptr[1] == '|'))))
- return syntax_bits & RE_TIGHT_VBAR ? _ALLENDLINE : _ENDLINE;
- goto normal_char;
-
- case '\\':
- FETCH(c, "Unfinished \\ quote");
+ FETCH(c, 0);
switch (c)
{
+ case '\\':
+ if (backslash)
+ goto normal_char;
+ if (lexleft == 0)
+ dfaerror(_("Unfinished \\ escape"));
+ backslash = 1;
+ break;
+
+ case '^':
+ if (backslash)
+ goto normal_char;
+ if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || lasttok == END
+ || lasttok == LPAREN
+ || lasttok == OR)
+ return lasttok = BEGLINE;
+ goto normal_char;
+
+ case '$':
+ if (backslash)
+ goto normal_char;
+ if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || lexleft == 0
+ || (syntax_bits & RE_NO_BK_PARENS
+ ? lexleft > 0 && *lexptr == ')'
+ : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
+ || (syntax_bits & RE_NO_BK_VBAR
+ ? lexleft > 0 && *lexptr == '|'
+ : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
+ || ((syntax_bits & RE_NEWLINE_ALT)
+ && lexleft > 0 && *lexptr == '\n'))
+ return lasttok = ENDLINE;
+ goto normal_char;
+
case '1':
case '2':
case '3':
@@ -303,238 +488,323 @@ lex()
case '7':
case '8':
case '9':
- caret_allowed = 0;
- closure_allowed = 1;
- return _BACKREF;
+ if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+ {
+ laststart = 0;
+ return lasttok = BACKREF;
+ }
+ goto normal_char;
+
+ case '`':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGLINE; /* FIXME: should be beginning of string */
+ goto normal_char;
+
+ case '\'':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDLINE; /* FIXME: should be end of string */
+ goto normal_char;
case '<':
- caret_allowed = 0;
- return _BEGWORD;
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGWORD;
+ goto normal_char;
case '>':
- caret_allowed = 0;
- return _ENDWORD;
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDWORD;
+ goto normal_char;
case 'b':
- caret_allowed = 0;
- return _LIMWORD;
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = LIMWORD;
+ goto normal_char;
case 'B':
- caret_allowed = 0;
- return _NOTLIMWORD;
-
- case 'w':
- case 'W':
- zeroset(cset);
- for (c2 = 0; c2 < _NOTCHAR; ++c2)
- if (ISALNUM(c2))
- setbit(c2, cset);
- if (c == 'W')
- notset(cset);
- caret_allowed = 0;
- closure_allowed = 1;
- return _SET + charset_index(cset);
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = NOTLIMWORD;
+ goto normal_char;
case '?':
- if (syntax_bits & RE_BK_PLUS_QM)
- goto qmark;
- goto normal_char;
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = QMARK;
+
+ case '*':
+ if (backslash)
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = STAR;
case '+':
- if (syntax_bits & RE_BK_PLUS_QM)
- goto plus;
- goto normal_char;
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = PLUS;
+
+ case '{':
+ if (!(syntax_bits & RE_INTERVALS))
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+ goto normal_char;
+ minrep = maxrep = 0;
+ /* Cases:
+ {M} - exact count
+ {M,} - minimum count, maximum is infinity
+ {,M} - 0 through M
+ {M,N} - M through N */
+ FETCH(c, _("unfinished repeat count"));
+ if (ISDIGIT(c))
+ {
+ minrep = c - '0';
+ for (;;)
+ {
+ FETCH(c, _("unfinished repeat count"));
+ if (!ISDIGIT(c))
+ break;
+ minrep = 10 * minrep + c - '0';
+ }
+ }
+ else if (c != ',')
+ dfaerror(_("malformed repeat count"));
+ if (c == ',')
+ for (;;)
+ {
+ FETCH(c, _("unfinished repeat count"));
+ if (!ISDIGIT(c))
+ break;
+ maxrep = 10 * maxrep + c - '0';
+ }
+ else
+ maxrep = minrep;
+ if (!(syntax_bits & RE_NO_BK_BRACES))
+ {
+ if (c != '\\')
+ dfaerror(_("malformed repeat count"));
+ FETCH(c, _("unfinished repeat count"));
+ }
+ if (c != '}')
+ dfaerror(_("malformed repeat count"));
+ laststart = 0;
+ return lasttok = REPMN;
case '|':
- if (! (syntax_bits & RE_NO_BK_VBAR))
- goto or;
- goto normal_char;
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+ goto normal_char;
+ laststart = 1;
+ return lasttok = OR;
+
+ case '\n':
+ if (syntax_bits & RE_LIMITED_OPS
+ || backslash
+ || !(syntax_bits & RE_NEWLINE_ALT))
+ goto normal_char;
+ laststart = 1;
+ return lasttok = OR;
case '(':
- if (! (syntax_bits & RE_NO_BK_PARENS))
- goto lparen;
- goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ goto normal_char;
+ ++parens;
+ laststart = 1;
+ return lasttok = LPAREN;
case ')':
- if (! (syntax_bits & RE_NO_BK_PARENS))
- goto rparen;
- goto normal_char;
-
- default:
- goto normal_char;
- }
+ if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ goto normal_char;
+ if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ --parens;
+ laststart = 0;
+ return lasttok = RPAREN;
+
+ case '.':
+ if (backslash)
+ goto normal_char;
+ zeroset(ccl);
+ notset(ccl);
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ clrbit('\n', ccl);
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ clrbit('\0', ccl);
+ laststart = 0;
+ return lasttok = CSET + charclass_index(ccl);
- case '?':
- if (syntax_bits & RE_BK_PLUS_QM)
- goto normal_char;
- qmark:
- if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
- goto normal_char;
- return _QMARK;
-
- case '*':
- if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
- goto normal_char;
- return _STAR;
-
- case '+':
- if (syntax_bits & RE_BK_PLUS_QM)
- goto normal_char;
- plus:
- if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
- goto normal_char;
- return _PLUS;
-
- case '|':
- if (! (syntax_bits & RE_NO_BK_VBAR))
- goto normal_char;
- or:
- caret_allowed = 1;
- closure_allowed = 0;
- return _OR;
-
- case '\n':
- if (! (syntax_bits & RE_NEWLINE_OR))
- goto normal_char;
- goto or;
-
- case '(':
- if (! (syntax_bits & RE_NO_BK_PARENS))
- goto normal_char;
- lparen:
- caret_allowed = 1;
- closure_allowed = 0;
- return _LPAREN;
-
- case ')':
- if (! (syntax_bits & RE_NO_BK_PARENS))
- goto normal_char;
- rparen:
- caret_allowed = 0;
- closure_allowed = 1;
- return _RPAREN;
-
- case '.':
- zeroset(cset);
- notset(cset);
- clrbit('\n', cset);
- caret_allowed = 0;
- closure_allowed = 1;
- return _SET + charset_index(cset);
-
- case '[':
- zeroset(cset);
- FETCH(c, "Unbalanced [");
- if (c == '^')
- {
- FETCH(c, "Unbalanced [");
- invert = 1;
- }
- else
- invert = 0;
- do
- {
- FETCH(c2, "Unbalanced [");
- if (c2 == '-')
+ case 'w':
+ case 'W':
+ if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ goto normal_char;
+ zeroset(ccl);
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (IS_WORD_CONSTITUENT(c2))
+ setbit(c2, ccl);
+ if (c == 'W')
+ notset(ccl);
+ laststart = 0;
+ return lasttok = CSET + charclass_index(ccl);
+
+ case '[':
+ if (backslash)
+ goto normal_char;
+ zeroset(ccl);
+ FETCH(c, _("Unbalanced ["));
+ if (c == '^')
+ {
+ FETCH(c, _("Unbalanced ["));
+ invert = 1;
+ }
+ else
+ invert = 0;
+ do
{
- FETCH(c2, "Unbalanced [");
+ /* Nobody ever said this had to be fast. :-)
+ Note that if we're looking at some other [:...:]
+ construct, we just treat it as a bunch of ordinary
+ characters. We can do this because we assume
+ regex has checked for syntax errors before
+ dfa is ever called. */
+ if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
+ for (c1 = 0; prednames[c1].name; ++c1)
+ if (looking_at(prednames[c1].name))
+ {
+ int (*pred)() = prednames[c1].pred;
+ if (case_fold
+ && (pred == is_upper || pred == is_lower))
+ pred = is_alpha;
+
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if ((*pred)(c2))
+ setbit(c2, ccl);
+ lexptr += strlen(prednames[c1].name);
+ lexleft -= strlen(prednames[c1].name);
+ FETCH(c1, _("Unbalanced ["));
+ goto skip;
+ }
+ if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH(c, _("Unbalanced ["));
+ FETCH(c1, _("Unbalanced ["));
+ if (c1 == '-')
+ {
+ FETCH(c2, _("Unbalanced ["));
+ if (c2 == ']')
+ {
+ /* In the case [x-], the - is an ordinary hyphen,
+ which is left in c1, the lookahead character. */
+ --lexptr;
+ ++lexleft;
+ c2 = c;
+ }
+ else
+ {
+ if (c2 == '\\'
+ && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH(c2, _("Unbalanced ["));
+ FETCH(c1, _("Unbalanced ["));
+ }
+ }
+ else
+ c2 = c;
while (c <= c2)
{
- setbit(c, cset);
+ setbit(c, ccl);
if (case_fold)
if (ISUPPER(c))
- setbit(tolower(c), cset);
+ setbit(tolower(c), ccl);
else if (ISLOWER(c))
- setbit(toupper(c), cset);
+ setbit(toupper(c), ccl);
++c;
}
- FETCH(c, "Unbalanced [");
+ skip:
+ ;
}
- else
+ while ((c = c1) != ']');
+ if (invert)
{
- setbit(c, cset);
- if (case_fold)
- if (ISUPPER(c))
- setbit(tolower(c), cset);
- else if (ISLOWER(c))
- setbit(toupper(c), cset);
- c = c2;
+ notset(ccl);
+ if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+ clrbit('\n', ccl);
}
- }
- while (c != ']');
- if (invert)
- notset(cset);
- caret_allowed = 0;
- closure_allowed = 1;
- return _SET + charset_index(cset);
+ laststart = 0;
+ return lasttok = CSET + charclass_index(ccl);
- default:
- normal_char:
- caret_allowed = 0;
- closure_allowed = 1;
- if (case_fold && ISALPHA(c))
- {
- zeroset(cset);
- if (isupper(c))
- c = tolower(c);
- setbit(c, cset);
- setbit(toupper(c), cset);
- return _SET + charset_index(cset);
+ default:
+ normal_char:
+ laststart = 0;
+ if (case_fold && ISALPHA(c))
+ {
+ zeroset(ccl);
+ setbit(c, ccl);
+ if (isupper(c))
+ setbit(tolower(c), ccl);
+ else
+ setbit(toupper(c), ccl);
+ return lasttok = CSET + charclass_index(ccl);
+ }
+ return c;
}
- return c;
}
+
+ /* The above loop should consume at most a backslash
+ and some other character. */
+ abort();
+ return END; /* keeps pedantic compilers happy. */
}
-
+
/* Recursive descent parser for regular expressions. */
-static _token tok; /* Lookahead token. */
-static depth; /* Current depth of a hypothetical stack
+static token tok; /* Lookahead token. */
+static int depth; /* Current depth of a hypothetical stack
holding deferred productions. This is
used to determine the depth that will be
required of the real stack later on in
- reganalyze(). */
+ dfaanalyze(). */
/* Add the given token to the parse tree, maintaining the depth count and
updating the maximum depth if necessary. */
static void
addtok(t)
- _token t;
+ token t;
{
- REALLOC_IF_NECESSARY(reg->tokens, _token, reg->talloc, reg->tindex);
- reg->tokens[reg->tindex++] = t;
+ REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
+ dfa->tokens[dfa->tindex++] = t;
switch (t)
{
- case _QMARK:
- case _STAR:
- case _PLUS:
+ case QMARK:
+ case STAR:
+ case PLUS:
break;
- case _CAT:
- case _OR:
+ case CAT:
+ case OR:
+ case ORTOP:
--depth;
break;
default:
- ++reg->nleaves;
- case _EMPTY:
+ ++dfa->nleaves;
+ case EMPTY:
++depth;
break;
}
- if (depth > reg->depth)
- reg->depth = depth;
+ if (depth > dfa->depth)
+ dfa->depth = depth;
}
/* The grammar understood by the parser is as follows.
- start:
- regexp
- _ALLBEGLINE regexp
- regexp _ALLENDLINE
- _ALLBEGLINE regexp _ALLENDLINE
-
regexp:
- regexp _OR branch
+ regexp OR branch
branch
branch:
@@ -542,144 +812,187 @@ addtok(t)
closure
closure:
- closure _QMARK
- closure _STAR
- closure _PLUS
+ closure QMARK
+ closure STAR
+ closure PLUS
atom
atom:
<normal character>
- _SET
- _BACKREF
- _BEGLINE
- _ENDLINE
- _BEGWORD
- _ENDWORD
- _LIMWORD
- _NOTLIMWORD
+ CSET
+ BACKREF
+ BEGLINE
+ ENDLINE
+ BEGWORD
+ ENDWORD
+ LIMWORD
+ NOTLIMWORD
<empty>
The parser builds a parse tree in postfix form in an array of tokens. */
-#if __STDC__
-static void regexp(void);
-#else
-static void regexp();
-#endif
-
static void
atom()
{
- if (tok >= 0 && tok < _NOTCHAR || tok >= _SET || tok == _BACKREF
- || tok == _BEGLINE || tok == _ENDLINE || tok == _BEGWORD
- || tok == _ENDWORD || tok == _LIMWORD || tok == _NOTLIMWORD)
+ if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
+ || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
+ || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
addtok(tok);
tok = lex();
}
- else if (tok == _LPAREN)
+ else if (tok == LPAREN)
{
tok = lex();
- regexp();
- if (tok != _RPAREN)
- regerror("Unbalanced (");
+ regexp(0);
+ if (tok != RPAREN)
+ dfaerror(_("Unbalanced ("));
tok = lex();
}
else
- addtok(_EMPTY);
+ addtok(EMPTY);
+}
+
+/* Return the number of tokens in the given subexpression. */
+static int
+nsubtoks(tindex)
+int tindex;
+{
+ int ntoks1;
+
+ switch (dfa->tokens[tindex - 1])
+ {
+ default:
+ return 1;
+ case QMARK:
+ case STAR:
+ case PLUS:
+ return 1 + nsubtoks(tindex - 1);
+ case CAT:
+ case OR:
+ case ORTOP:
+ ntoks1 = nsubtoks(tindex - 1);
+ return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
+ }
+}
+
+/* Copy the given subexpression to the top of the tree. */
+static void
+copytoks(tindex, ntokens)
+ int tindex, ntokens;
+{
+ int i;
+
+ for (i = 0; i < ntokens; ++i)
+ addtok(dfa->tokens[tindex + i]);
}
static void
closure()
{
+ int tindex, ntokens, i;
+
atom();
- while (tok == _QMARK || tok == _STAR || tok == _PLUS)
- {
- addtok(tok);
- tok = lex();
- }
+ while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
+ if (tok == REPMN)
+ {
+ ntokens = nsubtoks(dfa->tindex);
+ tindex = dfa->tindex - ntokens;
+ if (maxrep == 0)
+ addtok(PLUS);
+ if (minrep == 0)
+ addtok(QMARK);
+ for (i = 1; i < minrep; ++i)
+ {
+ copytoks(tindex, ntokens);
+ addtok(CAT);
+ }
+ for (; i < maxrep; ++i)
+ {
+ copytoks(tindex, ntokens);
+ addtok(QMARK);
+ addtok(CAT);
+ }
+ tok = lex();
+ }
+ else
+ {
+ addtok(tok);
+ tok = lex();
+ }
}
static void
branch()
{
closure();
- while (tok != _RPAREN && tok != _OR && tok != _ALLENDLINE && tok >= 0)
+ while (tok != RPAREN && tok != OR && tok >= 0)
{
closure();
- addtok(_CAT);
+ addtok(CAT);
}
}
static void
-regexp()
+regexp(toplevel)
+ int toplevel;
{
branch();
- while (tok == _OR)
+ while (tok == OR)
{
tok = lex();
branch();
- addtok(_OR);
+ if (toplevel)
+ addtok(ORTOP);
+ else
+ addtok(OR);
}
}
/* Main entry point for the parser. S is a string to be parsed, len is the
- length of the string, so s can include NUL characters. R is a pointer to
- the struct regexp to parse into. */
+ length of the string, so s can include NUL characters. D is a pointer to
+ the struct dfa to parse into. */
void
-regparse(s, len, r)
- const char *s;
+dfaparse(s, len, d)
+ char *s;
size_t len;
- struct regexp *r;
+ struct dfa *d;
+
{
- reg = r;
+ dfa = d;
lexstart = lexptr = s;
lexleft = len;
- caret_allowed = 1;
- closure_allowed = 0;
+ lasttok = END;
+ laststart = 1;
+ parens = 0;
if (! syntax_bits_set)
- regerror("No syntax specified");
+ dfaerror(_("No syntax specified"));
tok = lex();
- depth = r->depth;
-
- if (tok == _ALLBEGLINE)
- {
- addtok(_BEGLINE);
- tok = lex();
- regexp();
- addtok(_CAT);
- }
- else
- regexp();
+ depth = d->depth;
- if (tok == _ALLENDLINE)
- {
- addtok(_ENDLINE);
- addtok(_CAT);
- tok = lex();
- }
+ regexp(1);
- if (tok != _END)
- regerror("Unbalanced )");
+ if (tok != END)
+ dfaerror(_("Unbalanced )"));
- addtok(_END - r->nregexps);
- addtok(_CAT);
+ addtok(END - d->nregexps);
+ addtok(CAT);
- if (r->nregexps)
- addtok(_OR);
+ if (d->nregexps)
+ addtok(ORTOP);
- ++r->nregexps;
+ ++d->nregexps;
}
-
+
/* Some primitives for operating on sets of positions. */
/* Copy one set to another; the destination must be large enough. */
static void
copy(src, dst)
- const _position_set *src;
- _position_set *dst;
+ position_set *src;
+ position_set *dst;
{
int i;
@@ -694,14 +1007,14 @@ copy(src, dst)
S->elems must point to an array large enough to hold the resulting set. */
static void
insert(p, s)
- _position p;
- _position_set *s;
+ position p;
+ position_set *s;
{
int i;
- _position t1, t2;
+ position t1, t2;
for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
- ;
+ continue;
if (i < s->nelem && p.index == s->elems[i].index)
s->elems[i].constraint |= p.constraint;
else
@@ -721,9 +1034,9 @@ insert(p, s)
the positions of both sets were inserted into an initially empty set. */
static void
merge(s1, s2, m)
- _position_set *s1;
- _position_set *s2;
- _position_set *m;
+ position_set *s1;
+ position_set *s2;
+ position_set *m;
{
int i = 0, j = 0;
@@ -747,8 +1060,8 @@ merge(s1, s2, m)
/* Delete a position from a set. */
static void
delete(p, s)
- _position p;
- _position_set *s;
+ position p;
+ position_set *s;
{
int i;
@@ -759,15 +1072,15 @@ delete(p, s)
for (--s->nelem; i < s->nelem; ++i)
s->elems[i] = s->elems[i + 1];
}
-
+
/* Find the index of the state corresponding to the given position set with
the given preceding context, or create a new state if there is no such
state. Newline and letter tell whether we got here on a newline or
letter, respectively. */
static int
-state_index(r, s, newline, letter)
- struct regexp *r;
- _position_set *s;
+state_index(d, s, newline, letter)
+ struct dfa *d;
+ position_set *s;
int newline;
int letter;
{
@@ -782,75 +1095,77 @@ state_index(r, s, newline, letter)
hash ^= s->elems[i].index + s->elems[i].constraint;
/* Try to find a state that exactly matches the proposed one. */
- for (i = 0; i < r->sindex; ++i)
+ for (i = 0; i < d->sindex; ++i)
{
- if (hash != r->states[i].hash || s->nelem != r->states[i].elems.nelem
- || newline != r->states[i].newline || letter != r->states[i].letter)
+ if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
+ || newline != d->states[i].newline || letter != d->states[i].letter)
continue;
for (j = 0; j < s->nelem; ++j)
if (s->elems[j].constraint
- != r->states[i].elems.elems[j].constraint
- || s->elems[j].index != r->states[i].elems.elems[j].index)
+ != d->states[i].elems.elems[j].constraint
+ || s->elems[j].index != d->states[i].elems.elems[j].index)
break;
if (j == s->nelem)
return i;
}
/* We'll have to create a new state. */
- REALLOC_IF_NECESSARY(r->states, _dfa_state, r->salloc, r->sindex);
- r->states[i].hash = hash;
- MALLOC(r->states[i].elems.elems, _position, s->nelem);
- copy(s, &r->states[i].elems);
- r->states[i].newline = newline;
- r->states[i].letter = letter;
- r->states[i].backref = 0;
- r->states[i].constraint = 0;
- r->states[i].first_end = 0;
+ REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
+ d->states[i].hash = hash;
+ MALLOC(d->states[i].elems.elems, position, s->nelem);
+ copy(s, &d->states[i].elems);
+ d->states[i].newline = newline;
+ d->states[i].letter = letter;
+ d->states[i].backref = 0;
+ d->states[i].constraint = 0;
+ d->states[i].first_end = 0;
for (j = 0; j < s->nelem; ++j)
- if (r->tokens[s->elems[j].index] < 0)
+ if (d->tokens[s->elems[j].index] < 0)
{
constraint = s->elems[j].constraint;
- if (_SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
- || _SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
- || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
- || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
- r->states[i].constraint |= constraint;
- if (! r->states[i].first_end)
- r->states[i].first_end = r->tokens[s->elems[j].index];
+ if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
+ || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
+ || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
+ || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
+ d->states[i].constraint |= constraint;
+ if (! d->states[i].first_end)
+ d->states[i].first_end = d->tokens[s->elems[j].index];
}
- else if (r->tokens[s->elems[j].index] == _BACKREF)
+ else if (d->tokens[s->elems[j].index] == BACKREF)
{
- r->states[i].constraint = _NO_CONSTRAINT;
- r->states[i].backref = 1;
+ d->states[i].constraint = NO_CONSTRAINT;
+ d->states[i].backref = 1;
}
- ++r->sindex;
+ ++d->sindex;
return i;
}
-
+
/* Find the epsilon closure of a set of positions. If any position of the set
contains a symbol that matches the empty string in some context, replace
that position with the elements of its follow labeled with an appropriate
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
-void
-epsclosure(s, r)
- _position_set *s;
- struct regexp *r;
+static void epsclosure PARAMS ((position_set *s, struct dfa *d));
+
+static void
+epsclosure(s, d)
+ position_set *s;
+ struct dfa *d;
{
int i, j;
int *visited;
- _position p, old;
+ position p, old;
- MALLOC(visited, int, r->tindex);
- for (i = 0; i < r->tindex; ++i)
+ MALLOC(visited, int, d->tindex);
+ for (i = 0; i < d->tindex; ++i)
visited[i] = 0;
for (i = 0; i < s->nelem; ++i)
- if (r->tokens[s->elems[i].index] >= _NOTCHAR
- && r->tokens[s->elems[i].index] != _BACKREF
- && r->tokens[s->elems[i].index] < _SET)
+ if (d->tokens[s->elems[i].index] >= NOTCHAR
+ && d->tokens[s->elems[i].index] != BACKREF
+ && d->tokens[s->elems[i].index] < CSET)
{
old = s->elems[i];
p.constraint = old.constraint;
@@ -861,32 +1176,32 @@ epsclosure(s, r)
continue;
}
visited[old.index] = 1;
- switch (r->tokens[old.index])
+ switch (d->tokens[old.index])
{
- case _BEGLINE:
- p.constraint &= _BEGLINE_CONSTRAINT;
+ case BEGLINE:
+ p.constraint &= BEGLINE_CONSTRAINT;
break;
- case _ENDLINE:
- p.constraint &= _ENDLINE_CONSTRAINT;
+ case ENDLINE:
+ p.constraint &= ENDLINE_CONSTRAINT;
break;
- case _BEGWORD:
- p.constraint &= _BEGWORD_CONSTRAINT;
+ case BEGWORD:
+ p.constraint &= BEGWORD_CONSTRAINT;
break;
- case _ENDWORD:
- p.constraint &= _ENDWORD_CONSTRAINT;
+ case ENDWORD:
+ p.constraint &= ENDWORD_CONSTRAINT;
break;
- case _LIMWORD:
- p.constraint &= _LIMWORD_CONSTRAINT;
+ case LIMWORD:
+ p.constraint &= LIMWORD_CONSTRAINT;
break;
- case _NOTLIMWORD:
- p.constraint &= _NOTLIMWORD_CONSTRAINT;
+ case NOTLIMWORD:
+ p.constraint &= NOTLIMWORD_CONSTRAINT;
break;
default:
break;
}
- for (j = 0; j < r->follows[old.index].nelem; ++j)
+ for (j = 0; j < d->follows[old.index].nelem; ++j)
{
- p.index = r->follows[old.index].elems[j].index;
+ p.index = d->follows[old.index].elems[j].index;
insert(p, s);
}
/* Force rescan to start at the beginning. */
@@ -895,41 +1210,41 @@ epsclosure(s, r)
free(visited);
}
-
+
/* Perform bottom-up analysis on the parse tree, computing various functions.
Note that at this point, we're pretending constructs like \< are real
characters rather than constraints on what can follow them.
Nullable: A node is nullable if it is at the root of a regexp that can
match the empty string.
- * _EMPTY leaves are nullable.
+ * EMPTY leaves are nullable.
* No other leaf is nullable.
- * A _QMARK or _STAR node is nullable.
- * A _PLUS node is nullable if its argument is nullable.
- * A _CAT node is nullable if both its arguments are nullable.
- * An _OR node is nullable if either argument is nullable.
+ * A QMARK or STAR node is nullable.
+ * A PLUS node is nullable if its argument is nullable.
+ * A CAT node is nullable if both its arguments are nullable.
+ * An OR node is nullable if either argument is nullable.
Firstpos: The firstpos of a node is the set of positions (nonempty leaves)
that could correspond to the first character of a string matching the
regexp rooted at the given node.
- * _EMPTY leaves have empty firstpos.
+ * EMPTY leaves have empty firstpos.
* The firstpos of a nonempty leaf is that leaf itself.
- * The firstpos of a _QMARK, _STAR, or _PLUS node is the firstpos of its
+ * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its
argument.
- * The firstpos of a _CAT node is the firstpos of the left argument, union
+ * The firstpos of a CAT node is the firstpos of the left argument, union
the firstpos of the right if the left argument is nullable.
- * The firstpos of an _OR node is the union of firstpos of each argument.
+ * The firstpos of an OR node is the union of firstpos of each argument.
Lastpos: The lastpos of a node is the set of positions that could
correspond to the last character of a string matching the regexp at
the given node.
- * _EMPTY leaves have empty lastpos.
+ * EMPTY leaves have empty lastpos.
* The lastpos of a nonempty leaf is that leaf itself.
- * The lastpos of a _QMARK, _STAR, or _PLUS node is the lastpos of its
+ * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its
argument.
- * The lastpos of a _CAT node is the lastpos of its right argument, union
+ * The lastpos of a CAT node is the lastpos of its right argument, union
the lastpos of the left if the right argument is nullable.
- * The lastpos of an _OR node is the union of the lastpos of each argument.
+ * The lastpos of an OR node is the union of the lastpos of each argument.
Follow: The follow of a position is the set of positions that could
correspond to the character following a character matching the node in
@@ -938,9 +1253,9 @@ epsclosure(s, r)
Later, if we find that a special symbol is in a follow set, we will
replace it with the elements of its follow, labeled with an appropriate
constraint.
- * Every node in the firstpos of the argument of a _STAR or _PLUS node is in
+ * Every node in the firstpos of the argument of a STAR or PLUS node is in
the follow of every node in the lastpos.
- * Every node in the firstpos of the second argument of a _CAT node is in
+ * Every node in the firstpos of the second argument of a CAT node is in
the follow of every node in the lastpos of the first argument.
Because of the postfix representation of the parse tree, the depth-first
@@ -949,61 +1264,61 @@ epsclosure(s, r)
scheme; the number of elements in each set deeper in the stack can be
used to determine the address of a particular set's array. */
void
-reganalyze(r, searchflag)
- struct regexp *r;
+dfaanalyze(d, searchflag)
+ struct dfa *d;
int searchflag;
{
int *nullable; /* Nullable stack. */
int *nfirstpos; /* Element count stack for firstpos sets. */
- _position *firstpos; /* Array where firstpos elements are stored. */
+ position *firstpos; /* Array where firstpos elements are stored. */
int *nlastpos; /* Element count stack for lastpos sets. */
- _position *lastpos; /* Array where lastpos elements are stored. */
+ position *lastpos; /* Array where lastpos elements are stored. */
int *nalloc; /* Sizes of arrays allocated to follow sets. */
- _position_set tmp; /* Temporary set for merging sets. */
- _position_set merged; /* Result of merging sets. */
+ position_set tmp; /* Temporary set for merging sets. */
+ position_set merged; /* Result of merging sets. */
int wants_newline; /* True if some position wants newline info. */
int *o_nullable;
int *o_nfirst, *o_nlast;
- _position *o_firstpos, *o_lastpos;
+ position *o_firstpos, *o_lastpos;
int i, j;
- _position *pos;
+ position *pos;
#ifdef DEBUG
- fprintf(stderr, "reganalyze:\n");
- for (i = 0; i < r->tindex; ++i)
+ fprintf(stderr, "dfaanalyze:\n");
+ for (i = 0; i < d->tindex; ++i)
{
fprintf(stderr, " %d:", i);
- prtok(r->tokens[i]);
+ prtok(d->tokens[i]);
}
putc('\n', stderr);
#endif
- r->searchflag = searchflag;
+ d->searchflag = searchflag;
- MALLOC(nullable, int, r->depth);
+ MALLOC(nullable, int, d->depth);
o_nullable = nullable;
- MALLOC(nfirstpos, int, r->depth);
+ MALLOC(nfirstpos, int, d->depth);
o_nfirst = nfirstpos;
- MALLOC(firstpos, _position, r->nleaves);
- o_firstpos = firstpos, firstpos += r->nleaves;
- MALLOC(nlastpos, int, r->depth);
+ MALLOC(firstpos, position, d->nleaves);
+ o_firstpos = firstpos, firstpos += d->nleaves;
+ MALLOC(nlastpos, int, d->depth);
o_nlast = nlastpos;
- MALLOC(lastpos, _position, r->nleaves);
- o_lastpos = lastpos, lastpos += r->nleaves;
- MALLOC(nalloc, int, r->tindex);
- for (i = 0; i < r->tindex; ++i)
+ MALLOC(lastpos, position, d->nleaves);
+ o_lastpos = lastpos, lastpos += d->nleaves;
+ MALLOC(nalloc, int, d->tindex);
+ for (i = 0; i < d->tindex; ++i)
nalloc[i] = 0;
- MALLOC(merged.elems, _position, r->nleaves);
+ MALLOC(merged.elems, position, d->nleaves);
- CALLOC(r->follows, _position_set, r->tindex);
+ CALLOC(d->follows, position_set, d->tindex);
- for (i = 0; i < r->tindex; ++i)
+ for (i = 0; i < d->tindex; ++i)
#ifdef DEBUG
{ /* Nonsyntactic #ifdef goo... */
#endif
- switch (r->tokens[i])
+ switch (d->tokens[i])
{
- case _EMPTY:
+ case EMPTY:
/* The empty set is nullable. */
*nullable++ = 1;
@@ -1011,8 +1326,8 @@ reganalyze(r, searchflag)
*nfirstpos++ = *nlastpos++ = 0;
break;
- case _STAR:
- case _PLUS:
+ case STAR:
+ case PLUS:
/* Every element in the firstpos of the argument is in the follow
of every element in the lastpos. */
tmp.nelem = nfirstpos[-1];
@@ -1020,19 +1335,19 @@ reganalyze(r, searchflag)
pos = lastpos;
for (j = 0; j < nlastpos[-1]; ++j)
{
- merge(&tmp, &r->follows[pos[j].index], &merged);
- REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position,
+ merge(&tmp, &d->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
nalloc[pos[j].index], merged.nelem - 1);
- copy(&merged, &r->follows[pos[j].index]);
+ copy(&merged, &d->follows[pos[j].index]);
}
- case _QMARK:
- /* A _QMARK or _STAR node is automatically nullable. */
- if (r->tokens[i] != _PLUS)
+ case QMARK:
+ /* A QMARK or STAR node is automatically nullable. */
+ if (d->tokens[i] != PLUS)
nullable[-1] = 1;
break;
- case _CAT:
+ case CAT:
/* Every element in the firstpos of the second argument is in the
follow of every element in the lastpos of the first argument. */
tmp.nelem = nfirstpos[-1];
@@ -1040,13 +1355,13 @@ reganalyze(r, searchflag)
pos = lastpos + nlastpos[-1];
for (j = 0; j < nlastpos[-2]; ++j)
{
- merge(&tmp, &r->follows[pos[j].index], &merged);
- REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position,
+ merge(&tmp, &d->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
nalloc[pos[j].index], merged.nelem - 1);
- copy(&merged, &r->follows[pos[j].index]);
+ copy(&merged, &d->follows[pos[j].index]);
}
- /* The firstpos of a _CAT node is the firstpos of the first argument,
+ /* The firstpos of a CAT node is the firstpos of the first argument,
union that of the second argument if the first is nullable. */
if (nullable[-2])
nfirstpos[-2] += nfirstpos[-1];
@@ -1054,7 +1369,7 @@ reganalyze(r, searchflag)
firstpos += nfirstpos[-1];
--nfirstpos;
- /* The lastpos of a _CAT node is the lastpos of the second argument,
+ /* The lastpos of a CAT node is the lastpos of the second argument,
union that of the first argument if the second is nullable. */
if (nullable[-1])
nlastpos[-2] += nlastpos[-1];
@@ -1068,12 +1383,13 @@ reganalyze(r, searchflag)
}
--nlastpos;
- /* A _CAT node is nullable if both arguments are nullable. */
+ /* A CAT node is nullable if both arguments are nullable. */
nullable[-2] = nullable[-1] && nullable[-2];
--nullable;
break;
- case _OR:
+ case OR:
+ case ORTOP:
/* The firstpos is the union of the firstpos of each argument. */
nfirstpos[-2] += nfirstpos[-1];
--nfirstpos;
@@ -1082,7 +1398,7 @@ reganalyze(r, searchflag)
nlastpos[-2] += nlastpos[-1];
--nlastpos;
- /* An _OR node is nullable if either argument is nullable. */
+ /* An OR node is nullable if either argument is nullable. */
nullable[-2] = nullable[-1] || nullable[-2];
--nullable;
break;
@@ -1093,36 +1409,36 @@ reganalyze(r, searchflag)
an "epsilon closure" effectively makes them nullable later.
Backreferences have to get a real position so we can detect
transitions on them later. But they are nullable. */
- *nullable++ = r->tokens[i] == _BACKREF;
+ *nullable++ = d->tokens[i] == BACKREF;
/* This position is in its own firstpos and lastpos. */
*nfirstpos++ = *nlastpos++ = 1;
--firstpos, --lastpos;
firstpos->index = lastpos->index = i;
- firstpos->constraint = lastpos->constraint = _NO_CONSTRAINT;
+ firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
/* Allocate the follow set for this position. */
nalloc[i] = 1;
- MALLOC(r->follows[i].elems, _position, nalloc[i]);
+ MALLOC(d->follows[i].elems, position, nalloc[i]);
break;
}
#ifdef DEBUG
/* ... balance the above nonsyntactic #ifdef goo... */
fprintf(stderr, "node %d:", i);
- prtok(r->tokens[i]);
+ prtok(d->tokens[i]);
putc('\n', stderr);
fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
fprintf(stderr, " firstpos:");
for (j = nfirstpos[-1] - 1; j >= 0; --j)
{
fprintf(stderr, " %d:", firstpos[j].index);
- prtok(r->tokens[firstpos[j].index]);
+ prtok(d->tokens[firstpos[j].index]);
}
fprintf(stderr, "\n lastpos:");
for (j = nlastpos[-1] - 1; j >= 0; --j)
{
fprintf(stderr, " %d:", lastpos[j].index);
- prtok(r->tokens[lastpos[j].index]);
+ prtok(d->tokens[lastpos[j].index]);
}
putc('\n', stderr);
}
@@ -1130,26 +1446,26 @@ reganalyze(r, searchflag)
/* For each follow set that is the follow set of a real position, replace
it with its epsilon closure. */
- for (i = 0; i < r->tindex; ++i)
- if (r->tokens[i] < _NOTCHAR || r->tokens[i] == _BACKREF
- || r->tokens[i] >= _SET)
+ for (i = 0; i < d->tindex; ++i)
+ if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
+ || d->tokens[i] >= CSET)
{
#ifdef DEBUG
fprintf(stderr, "follows(%d:", i);
- prtok(r->tokens[i]);
+ prtok(d->tokens[i]);
fprintf(stderr, "):");
- for (j = r->follows[i].nelem - 1; j >= 0; --j)
+ for (j = d->follows[i].nelem - 1; j >= 0; --j)
{
- fprintf(stderr, " %d:", r->follows[i].elems[j].index);
- prtok(r->tokens[r->follows[i].elems[j].index]);
+ fprintf(stderr, " %d:", d->follows[i].elems[j].index);
+ prtok(d->tokens[d->follows[i].elems[j].index]);
}
putc('\n', stderr);
#endif
- copy(&r->follows[i], &merged);
- epsclosure(&merged, r);
- if (r->follows[i].nelem < merged.nelem)
- REALLOC(r->follows[i].elems, _position, merged.nelem);
- copy(&merged, &r->follows[i]);
+ copy(&d->follows[i], &merged);
+ epsclosure(&merged, d);
+ if (d->follows[i].nelem < merged.nelem)
+ REALLOC(d->follows[i].elems, position, merged.nelem);
+ copy(&merged, &d->follows[i]);
}
/* Get the epsilon closure of the firstpos of the regexp. The result will
@@ -1157,19 +1473,19 @@ reganalyze(r, searchflag)
merged.nelem = 0;
for (i = 0; i < nfirstpos[-1]; ++i)
insert(firstpos[i], &merged);
- epsclosure(&merged, r);
+ epsclosure(&merged, d);
/* Check if any of the positions of state 0 will want newline context. */
wants_newline = 0;
for (i = 0; i < merged.nelem; ++i)
- if (_PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
+ if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
wants_newline = 1;
/* Build the initial state. */
- r->salloc = 1;
- r->sindex = 0;
- MALLOC(r->states, _dfa_state, r->salloc);
- state_index(r, &merged, wants_newline, 0);
+ d->salloc = 1;
+ d->sindex = 0;
+ MALLOC(d->states, dfa_state, d->salloc);
+ state_index(d, &merged, wants_newline, 0);
free(o_nullable);
free(o_nfirst);
@@ -1179,8 +1495,8 @@ reganalyze(r, searchflag)
free(nalloc);
free(merged.elems);
}
-
-/* Find, for each character, the transition out of state s of r, and store
+
+/* Find, for each character, the transition out of state s of d, and store
it in the appropriate slot of trans.
We divide the positions of s into groups (positions can appear in more
@@ -1211,52 +1527,52 @@ reganalyze(r, searchflag)
create a new group labeled with the characters of C and insert this
position in that group. */
void
-regstate(s, r, trans)
+dfastate(s, d, trans)
int s;
- struct regexp *r;
+ struct dfa *d;
int trans[];
{
- _position_set grps[_NOTCHAR]; /* As many as will ever be needed. */
- _charset labels[_NOTCHAR]; /* Labels corresponding to the groups. */
+ position_set grps[NOTCHAR]; /* As many as will ever be needed. */
+ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
int ngrps = 0; /* Number of groups actually used. */
- _position pos; /* Current position being considered. */
- _charset matches; /* Set of matching characters. */
+ position pos; /* Current position being considered. */
+ charclass matches; /* Set of matching characters. */
int matchesf; /* True if matches is nonempty. */
- _charset intersect; /* Intersection with some label set. */
+ charclass intersect; /* Intersection with some label set. */
int intersectf; /* True if intersect is nonempty. */
- _charset leftovers; /* Stuff in the label that didn't match. */
+ charclass leftovers; /* Stuff in the label that didn't match. */
int leftoversf; /* True if leftovers is nonempty. */
- static _charset letters; /* Set of characters considered letters. */
- static _charset newline; /* Set of characters that aren't newline. */
- _position_set follows; /* Union of the follows of some group. */
- _position_set tmp; /* Temporary space for merging sets. */
+ static charclass letters; /* Set of characters considered letters. */
+ static charclass newline; /* Set of characters that aren't newline. */
+ position_set follows; /* Union of the follows of some group. */
+ position_set tmp; /* Temporary space for merging sets. */
int state; /* New state. */
int wants_newline; /* New state wants to know newline context. */
int state_newline; /* New state on a newline transition. */
int wants_letter; /* New state wants to know letter context. */
int state_letter; /* New state on a letter transition. */
- static initialized; /* Flag for static initialization. */
+ static int initialized; /* Flag for static initialization. */
int i, j, k;
/* Initialize the set of letters, if necessary. */
if (! initialized)
{
initialized = 1;
- for (i = 0; i < _NOTCHAR; ++i)
- if (ISALNUM(i))
+ for (i = 0; i < NOTCHAR; ++i)
+ if (IS_WORD_CONSTITUENT(i))
setbit(i, letters);
setbit('\n', newline);
}
zeroset(matches);
- for (i = 0; i < r->states[s].elems.nelem; ++i)
+ for (i = 0; i < d->states[s].elems.nelem; ++i)
{
- pos = r->states[s].elems.elems[i];
- if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR)
- setbit(r->tokens[pos.index], matches);
- else if (r->tokens[pos.index] >= _SET)
- copyset(r->charsets[r->tokens[pos.index] - _SET], matches);
+ pos = d->states[s].elems.elems[i];
+ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
+ setbit(d->tokens[pos.index], matches);
+ else if (d->tokens[pos.index] >= CSET)
+ copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
else
continue;
@@ -1264,26 +1580,26 @@ regstate(s, r, trans)
they fail in the current context. */
if (pos.constraint != 0xFF)
{
- if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint,
- r->states[s].newline, 1))
+ if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ d->states[s].newline, 1))
clrbit('\n', matches);
- if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint,
- r->states[s].newline, 0))
- for (j = 0; j < _CHARSET_INTS; ++j)
+ if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ d->states[s].newline, 0))
+ for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= newline[j];
- if (! _MATCHES_LETTER_CONTEXT(pos.constraint,
- r->states[s].letter, 1))
- for (j = 0; j < _CHARSET_INTS; ++j)
+ if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+ d->states[s].letter, 1))
+ for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= ~letters[j];
- if (! _MATCHES_LETTER_CONTEXT(pos.constraint,
- r->states[s].letter, 0))
- for (j = 0; j < _CHARSET_INTS; ++j)
+ if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+ d->states[s].letter, 0))
+ for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= letters[j];
/* If there are no characters left, there's no point in going on. */
- for (j = 0; j < _CHARSET_INTS && !matches[j]; ++j)
- ;
- if (j == _CHARSET_INTS)
+ for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
+ continue;
+ if (j == CHARCLASS_INTS)
continue;
}
@@ -1292,27 +1608,27 @@ regstate(s, r, trans)
/* If matches contains a single character only, and the current
group's label doesn't contain that character, go on to the
next group. */
- if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR
- && !tstbit(r->tokens[pos.index], labels[j]))
+ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
+ && !tstbit(d->tokens[pos.index], labels[j]))
continue;
/* Check if this group's label has a nonempty intersection with
matches. */
intersectf = 0;
- for (k = 0; k < _CHARSET_INTS; ++k)
- (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
+ for (k = 0; k < CHARCLASS_INTS; ++k)
+ (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
if (! intersectf)
continue;
/* It does; now find the set differences both ways. */
leftoversf = matchesf = 0;
- for (k = 0; k < _CHARSET_INTS; ++k)
+ for (k = 0; k < CHARCLASS_INTS; ++k)
{
/* Even an optimizing compiler can't know this for sure. */
int match = matches[k], label = labels[j][k];
- (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
- (matches[k] = match & ~label) ? matchesf = 1 : 0;
+ (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
+ (matches[k] = match & ~label) ? (matchesf = 1) : 0;
}
/* If there were leftovers, create a new group labeled with them. */
@@ -1320,7 +1636,7 @@ regstate(s, r, trans)
{
copyset(leftovers, labels[ngrps]);
copyset(intersect, labels[j]);
- MALLOC(grps[ngrps].elems, _position, r->nleaves);
+ MALLOC(grps[ngrps].elems, position, d->nleaves);
copy(&grps[j], &grps[ngrps]);
++ngrps;
}
@@ -1341,50 +1657,46 @@ regstate(s, r, trans)
{
copyset(matches, labels[ngrps]);
zeroset(matches);
- MALLOC(grps[ngrps].elems, _position, r->nleaves);
+ MALLOC(grps[ngrps].elems, position, d->nleaves);
grps[ngrps].nelem = 1;
grps[ngrps].elems[0] = pos;
++ngrps;
}
}
- MALLOC(follows.elems, _position, r->nleaves);
- MALLOC(tmp.elems, _position, r->nleaves);
+ MALLOC(follows.elems, position, d->nleaves);
+ MALLOC(tmp.elems, position, d->nleaves);
/* If we are a searching matcher, the default transition is to a state
containing the positions of state 0, otherwise the default transition
is to fail miserably. */
- if (r->searchflag)
+ if (d->searchflag)
{
wants_newline = 0;
wants_letter = 0;
- for (i = 0; i < r->states[0].elems.nelem; ++i)
+ for (i = 0; i < d->states[0].elems.nelem; ++i)
{
- if (_PREV_NEWLINE_DEPENDENT(r->states[0].elems.elems[i].constraint))
+ if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
wants_newline = 1;
- if (_PREV_LETTER_DEPENDENT(r->states[0].elems.elems[i].constraint))
+ if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
wants_letter = 1;
}
- copy(&r->states[0].elems, &follows);
- state = state_index(r, &follows, 0, 0);
+ copy(&d->states[0].elems, &follows);
+ state = state_index(d, &follows, 0, 0);
if (wants_newline)
- state_newline = state_index(r, &follows, 1, 0);
+ state_newline = state_index(d, &follows, 1, 0);
else
state_newline = state;
if (wants_letter)
- state_letter = state_index(r, &follows, 0, 1);
+ state_letter = state_index(d, &follows, 0, 1);
else
state_letter = state;
- for (i = 0; i < _NOTCHAR; ++i)
- if (i == '\n')
- trans[i] = state_newline;
- else if (ISALNUM(i))
- trans[i] = state_letter;
- else
- trans[i] = state;
+ for (i = 0; i < NOTCHAR; ++i)
+ trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
+ trans['\n'] = state_newline;
}
else
- for (i = 0; i < _NOTCHAR; ++i)
+ for (i = 0; i < NOTCHAR; ++i)
trans[i] = -1;
for (i = 0; i < ngrps; ++i)
@@ -1394,44 +1706,44 @@ regstate(s, r, trans)
/* Find the union of the follows of the positions of the group.
This is a hideously inefficient loop. Fix it someday. */
for (j = 0; j < grps[i].nelem; ++j)
- for (k = 0; k < r->follows[grps[i].elems[j].index].nelem; ++k)
- insert(r->follows[grps[i].elems[j].index].elems[k], &follows);
+ for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
+ insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
- if (r->searchflag)
- for (j = 0; j < r->states[0].elems.nelem; ++j)
- insert(r->states[0].elems.elems[j], &follows);
+ if (d->searchflag)
+ for (j = 0; j < d->states[0].elems.nelem; ++j)
+ insert(d->states[0].elems.elems[j], &follows);
/* Find out if the new state will want any context information. */
wants_newline = 0;
if (tstbit('\n', labels[i]))
for (j = 0; j < follows.nelem; ++j)
- if (_PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
+ if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
wants_newline = 1;
wants_letter = 0;
- for (j = 0; j < _CHARSET_INTS; ++j)
+ for (j = 0; j < CHARCLASS_INTS; ++j)
if (labels[i][j] & letters[j])
break;
- if (j < _CHARSET_INTS)
+ if (j < CHARCLASS_INTS)
for (j = 0; j < follows.nelem; ++j)
- if (_PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
+ if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
wants_letter = 1;
/* Find the state(s) corresponding to the union of the follows. */
- state = state_index(r, &follows, 0, 0);
+ state = state_index(d, &follows, 0, 0);
if (wants_newline)
- state_newline = state_index(r, &follows, 1, 0);
+ state_newline = state_index(d, &follows, 1, 0);
else
state_newline = state;
if (wants_letter)
- state_letter = state_index(r, &follows, 0, 1);
+ state_letter = state_index(d, &follows, 0, 1);
else
state_letter = state;
/* Set the transitions for each character in the current label. */
- for (j = 0; j < _CHARSET_INTS; ++j)
+ for (j = 0; j < CHARCLASS_INTS; ++j)
for (k = 0; k < INTBITS; ++k)
if (labels[i][j] & 1 << k)
{
@@ -1439,9 +1751,9 @@ regstate(s, r, trans)
if (c == '\n')
trans[c] = state_newline;
- else if (ISALNUM(c))
+ else if (IS_WORD_CONSTITUENT(c))
trans[c] = state_letter;
- else if (c < _NOTCHAR)
+ else if (c < NOTCHAR)
trans[c] = state;
}
}
@@ -1451,18 +1763,18 @@ regstate(s, r, trans)
free(follows.elems);
free(tmp.elems);
}
-
-/* Some routines for manipulating a compiled regexp's transition tables.
+
+/* Some routines for manipulating a compiled dfa's transition tables.
Each state may or may not have a transition table; if it does, and it
- is a non-accepting state, then r->trans[state] points to its table.
- If it is an accepting state then r->fails[state] points to its table.
- If it has no table at all, then r->trans[state] is NULL.
+ is a non-accepting state, then d->trans[state] points to its table.
+ If it is an accepting state then d->fails[state] points to its table.
+ If it has no table at all, then d->trans[state] is NULL.
TODO: Improve this comment, get rid of the unnecessary redundancy. */
static void
-build_state(s, r)
+build_state(s, d)
int s;
- struct regexp *r;
+ struct dfa *d;
{
int *trans; /* The new transition table. */
int i;
@@ -1471,87 +1783,87 @@ build_state(s, r)
exist at once. 1024 is arbitrary. The idea is that the frequently
used transition tables will be quickly rebuilt, whereas the ones that
were only needed once or twice will be cleared away. */
- if (r->trcount >= 1024)
+ if (d->trcount >= 1024)
{
- for (i = 0; i < r->tralloc; ++i)
- if (r->trans[i])
+ for (i = 0; i < d->tralloc; ++i)
+ if (d->trans[i])
{
- free((ptr_t) r->trans[i]);
- r->trans[i] = NULL;
+ free((ptr_t) d->trans[i]);
+ d->trans[i] = NULL;
}
- else if (r->fails[i])
+ else if (d->fails[i])
{
- free((ptr_t) r->fails[i]);
- r->fails[i] = NULL;
+ free((ptr_t) d->fails[i]);
+ d->fails[i] = NULL;
}
- r->trcount = 0;
+ d->trcount = 0;
}
- ++r->trcount;
+ ++d->trcount;
/* Set up the success bits for this state. */
- r->success[s] = 0;
- if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 1, r->states[s].letter, 0,
- s, *r))
- r->success[s] |= 4;
- if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 1,
- s, *r))
- r->success[s] |= 2;
- if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 0,
- s, *r))
- r->success[s] |= 1;
-
- MALLOC(trans, int, _NOTCHAR);
- regstate(s, r, trans);
+ d->success[s] = 0;
+ if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
+ s, *d))
+ d->success[s] |= 4;
+ if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
+ s, *d))
+ d->success[s] |= 2;
+ if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
+ s, *d))
+ d->success[s] |= 1;
+
+ MALLOC(trans, int, NOTCHAR);
+ dfastate(s, d, trans);
/* Now go through the new transition table, and make sure that the trans
and fail arrays are allocated large enough to hold a pointer for the
largest state mentioned in the table. */
- for (i = 0; i < _NOTCHAR; ++i)
- if (trans[i] >= r->tralloc)
+ for (i = 0; i < NOTCHAR; ++i)
+ if (trans[i] >= d->tralloc)
{
- int oldalloc = r->tralloc;
-
- while (trans[i] >= r->tralloc)
- r->tralloc *= 2;
- REALLOC(r->realtrans, int *, r->tralloc + 1);
- r->trans = r->realtrans + 1;
- REALLOC(r->fails, int *, r->tralloc);
- REALLOC(r->success, int, r->tralloc);
- REALLOC(r->newlines, int, r->tralloc);
- while (oldalloc < r->tralloc)
+ int oldalloc = d->tralloc;
+
+ while (trans[i] >= d->tralloc)
+ d->tralloc *= 2;
+ REALLOC(d->realtrans, int *, d->tralloc + 1);
+ d->trans = d->realtrans + 1;
+ REALLOC(d->fails, int *, d->tralloc);
+ REALLOC(d->success, int, d->tralloc);
+ REALLOC(d->newlines, int, d->tralloc);
+ while (oldalloc < d->tralloc)
{
- r->trans[oldalloc] = NULL;
- r->fails[oldalloc++] = NULL;
+ d->trans[oldalloc] = NULL;
+ d->fails[oldalloc++] = NULL;
}
}
/* Keep the newline transition in a special place so we can use it as
a sentinel. */
- r->newlines[s] = trans['\n'];
+ d->newlines[s] = trans['\n'];
trans['\n'] = -1;
- if (ACCEPTING(s, *r))
- r->fails[s] = trans;
+ if (ACCEPTING(s, *d))
+ d->fails[s] = trans;
else
- r->trans[s] = trans;
+ d->trans[s] = trans;
}
static void
-build_state_zero(r)
- struct regexp *r;
+build_state_zero(d)
+ struct dfa *d;
{
- r->tralloc = 1;
- r->trcount = 0;
- CALLOC(r->realtrans, int *, r->tralloc + 1);
- r->trans = r->realtrans + 1;
- CALLOC(r->fails, int *, r->tralloc);
- MALLOC(r->success, int, r->tralloc);
- MALLOC(r->newlines, int, r->tralloc);
- build_state(0, r);
+ d->tralloc = 1;
+ d->trcount = 0;
+ CALLOC(d->realtrans, int *, d->tralloc + 1);
+ d->trans = d->realtrans + 1;
+ CALLOC(d->fails, int *, d->tralloc);
+ MALLOC(d->success, int, d->tralloc);
+ MALLOC(d->newlines, int, d->tralloc);
+ build_state(0, d);
}
-
-/* Search through a buffer looking for a match to the given struct regexp.
+
+/* Search through a buffer looking for a match to the given struct dfa.
Find the first occurrence of a string matching the regexp in the buffer,
and the shortest possible version thereof. Return a pointer to the first
character after the match, or NULL if none is found. Begin points to
@@ -1565,74 +1877,61 @@ build_state_zero(r)
match needs to be verified by a backtracking matcher. Otherwise
we store a 0 in *backref. */
char *
-regexecute(r, begin, end, newline, count, backref)
- struct regexp *r;
+dfaexec(d, begin, end, newline, count, backref)
+ struct dfa *d;
char *begin;
char *end;
int newline;
int *count;
int *backref;
{
- register s, s1, tmp; /* Current state. */
+ register int s, s1, tmp; /* Current state. */
register unsigned char *p; /* Current input character. */
- register **trans, *t; /* Copy of r->trans so it can be optimized
+ register int **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
- static sbit[_NOTCHAR]; /* Table for anding with r->success. */
- static sbit_init;
+ static int sbit[NOTCHAR]; /* Table for anding with d->success. */
+ static int sbit_init;
if (! sbit_init)
{
int i;
sbit_init = 1;
- for (i = 0; i < _NOTCHAR; ++i)
- if (i == '\n')
- sbit[i] = 4;
- else if (ISALNUM(i))
- sbit[i] = 2;
- else
- sbit[i] = 1;
+ for (i = 0; i < NOTCHAR; ++i)
+ sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
+ sbit['\n'] = 4;
}
- if (! r->tralloc)
- build_state_zero(r);
+ if (! d->tralloc)
+ build_state_zero(d);
- s = 0;
+ s = s1 = 0;
p = (unsigned char *) begin;
- trans = r->trans;
+ trans = d->trans;
*end = '\n';
for (;;)
{
- /* The dreaded inner loop. */
- if (t = trans[s])
- do
- {
- s1 = t[*p++];
- if (! (t = trans[s1]))
- goto last_was_s;
- s = t[*p++];
- }
- while (t = trans[s]);
- goto last_was_s1;
- last_was_s:
- tmp = s, s = s1, s1 = tmp;
- last_was_s1:
+ while ((t = trans[s]) != 0) { /* hand-optimized loop */
+ s1 = t[*p++];
+ if ((t = trans[s1]) == 0) {
+ tmp = s ; s = s1 ; s1 = tmp ; /* swap */
+ break;
+ }
+ s = t[*p++];
+ }
- if (s >= 0 && p <= (unsigned char *) end && r->fails[s])
+ if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
{
- if (r->success[s] & sbit[*p])
+ if (d->success[s] & sbit[*p])
{
if (backref)
- if (r->states[s].backref)
- *backref = 1;
- else
- *backref = 0;
+ *backref = (d->states[s].backref != 0);
return (char *) p;
}
s1 = s;
- s = r->fails[s][*p++];
+ s = d->fails[s][*p++];
continue;
}
@@ -1641,636 +1940,664 @@ regexecute(r, begin, end, newline, count, backref)
++*count;
/* Check if we've run off the end of the buffer. */
- if ((char *) p >= end)
+ if ((char *) p > end)
return NULL;
if (s >= 0)
{
- build_state(s, r);
- trans = r->trans;
+ build_state(s, d);
+ trans = d->trans;
continue;
}
if (p[-1] == '\n' && newline)
{
- s = r->newlines[s1];
+ s = d->newlines[s1];
continue;
}
s = 0;
}
}
-
-/* Initialize the components of a regexp that the other routines don't
+
+/* Initialize the components of a dfa that the other routines don't
initialize for themselves. */
void
-reginit(r)
- struct regexp *r;
+dfainit(d)
+ struct dfa *d;
{
- r->calloc = 1;
- MALLOC(r->charsets, _charset, r->calloc);
- r->cindex = 0;
+ d->calloc = 1;
+ MALLOC(d->charclasses, charclass, d->calloc);
+ d->cindex = 0;
- r->talloc = 1;
- MALLOC(r->tokens, _token, r->talloc);
- r->tindex = r->depth = r->nleaves = r->nregexps = 0;
+ d->talloc = 1;
+ MALLOC(d->tokens, token, d->talloc);
+ d->tindex = d->depth = d->nleaves = d->nregexps = 0;
- r->searchflag = 0;
- r->tralloc = 0;
+ d->searchflag = 0;
+ d->tralloc = 0;
+
+ d->musts = 0;
}
/* Parse and analyze a single string of the given length. */
void
-regcompile(s, len, r, searchflag)
- const char *s;
+dfacomp(s, len, d, searchflag)
+ char *s;
size_t len;
- struct regexp *r;
+ struct dfa *d;
int searchflag;
{
- if (case_fold) /* dummy folding in service of regmust() */
+ if (case_fold) /* dummy folding in service of dfamust() */
{
- char *copy;
+ char *lcopy;
int i;
- copy = malloc(len);
- if (!copy)
- regerror("out of memory");
-
- /* This is a complete kludge and could potentially break
- \<letter> escapes . . . */
+ lcopy = malloc(len);
+ if (!lcopy)
+ dfaerror(_("out of memory"));
+
+ /* This is a kludge. */
case_fold = 0;
for (i = 0; i < len; ++i)
- if (ISUPPER(s[i]))
- copy[i] = tolower(s[i]);
+ if (ISUPPER ((unsigned char) s[i]))
+ lcopy[i] = tolower ((unsigned char) s[i]);
else
- copy[i] = s[i];
-
- reginit(r);
- r->mustn = 0;
- r->must[0] = '\0';
- regparse(copy, len, r);
- free(copy);
- regmust(r);
- reganalyze(r, searchflag);
+ lcopy[i] = s[i];
+
+ dfainit(d);
+ dfaparse(lcopy, len, d);
+ free(lcopy);
+ dfamust(d);
+ d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
case_fold = 1;
- reginit(r);
- regparse(s, len, r);
- reganalyze(r, searchflag);
+ dfaparse(s, len, d);
+ dfaanalyze(d, searchflag);
}
else
{
- reginit(r);
- regparse(s, len, r);
- regmust(r);
- reganalyze(r, searchflag);
+ dfainit(d);
+ dfaparse(s, len, d);
+ dfamust(d);
+ dfaanalyze(d, searchflag);
}
}
-/* Free the storage held by the components of a regexp. */
+/* Free the storage held by the components of a dfa. */
void
-regfree(r)
- struct regexp *r;
+dfafree(d)
+ struct dfa *d;
{
int i;
-
- free((ptr_t) r->charsets);
- free((ptr_t) r->tokens);
- for (i = 0; i < r->sindex; ++i)
- free((ptr_t) r->states[i].elems.elems);
- free((ptr_t) r->states);
- for (i = 0; i < r->tindex; ++i)
- if (r->follows[i].elems)
- free((ptr_t) r->follows[i].elems);
- free((ptr_t) r->follows);
- for (i = 0; i < r->tralloc; ++i)
- if (r->trans[i])
- free((ptr_t) r->trans[i]);
- else if (r->fails[i])
- free((ptr_t) r->fails[i]);
- free((ptr_t) r->realtrans);
- free((ptr_t) r->fails);
- free((ptr_t) r->newlines);
+ struct dfamust *dm, *ndm;
+
+ free((ptr_t) d->charclasses);
+ free((ptr_t) d->tokens);
+ for (i = 0; i < d->sindex; ++i)
+ free((ptr_t) d->states[i].elems.elems);
+ free((ptr_t) d->states);
+ for (i = 0; i < d->tindex; ++i)
+ if (d->follows[i].elems)
+ free((ptr_t) d->follows[i].elems);
+ free((ptr_t) d->follows);
+ for (i = 0; i < d->tralloc; ++i)
+ if (d->trans[i])
+ free((ptr_t) d->trans[i]);
+ else if (d->fails[i])
+ free((ptr_t) d->fails[i]);
+ if (d->realtrans) free((ptr_t) d->realtrans);
+ if (d->fails) free((ptr_t) d->fails);
+ if (d->newlines) free((ptr_t) d->newlines);
+ if (d->success) free((ptr_t) d->success);
+ for (dm = d->musts; dm; dm = ndm)
+ {
+ ndm = dm->next;
+ free(dm->must);
+ free((ptr_t) dm);
+ }
}
-/*
-Having found the postfix representation of the regular expression,
-try to find a long sequence of characters that must appear in any line
-containing the r.e.
-Finding a "longest" sequence is beyond the scope here;
-we take an easy way out and hope for the best.
-(Take "(ab|a)b"--please.)
-
-We do a bottom-up calculation of sequences of characters that must appear
-in matches of r.e.'s represented by trees rooted at the nodes of the postfix
-representation:
+/* Having found the postfix representation of the regular expression,
+ try to find a long sequence of characters that must appear in any line
+ containing the r.e.
+ Finding a "longest" sequence is beyond the scope here;
+ we take an easy way out and hope for the best.
+ (Take "(ab|a)b"--please.)
+
+ We do a bottom-up calculation of sequences of characters that must appear
+ in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+ representation:
sequences that must appear at the left of the match ("left")
sequences that must appear at the right of the match ("right")
lists of sequences that must appear somewhere in the match ("in")
sequences that must constitute the match ("is")
-When we get to the root of the tree, we use one of the longest of its
-calculated "in" sequences as our answer. The sequence we find is returned in
-r->must (where "r" is the single argument passed to "regmust");
-the length of the sequence is returned in r->mustn.
-The sequences calculated for the various types of node (in pseudo ANSI c)
-are shown below. "p" is the operand of unary operators (and the left-hand
-operand of binary operators); "q" is the right-hand operand of binary operators
-.
-"ZERO" means "a zero-length sequence" below.
+ When we get to the root of the tree, we use one of the longest of its
+ calculated "in" sequences as our answer. The sequence we find is returned in
+ d->must (where "d" is the single argument passed to "dfamust");
+ the length of the sequence is returned in d->mustn.
-Type left right is in
----- ---- ----- -- --
-char c # c # c # c # c
+ The sequences calculated for the various types of node (in pseudo ANSI c)
+ are shown below. "p" is the operand of unary operators (and the left-hand
+ operand of binary operators); "q" is the right-hand operand of binary
+ operators.
-SET ZERO ZERO ZERO ZERO
+ "ZERO" means "a zero-length sequence" below.
-STAR ZERO ZERO ZERO ZERO
+ Type left right is in
+ ---- ---- ----- -- --
+ char c # c # c # c # c
-QMARK ZERO ZERO ZERO ZERO
+ CSET ZERO ZERO ZERO ZERO
-PLUS p->left p->right ZERO p->in
+ STAR ZERO ZERO ZERO ZERO
-CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
- p->left : q->right : q->is!=ZERO) ? q->in plus
- p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
- ZERO
+ QMARK ZERO ZERO ZERO ZERO
-OR longest common longest common (do p->is and substrings common to
- leading trailing q->is have same p->in and q->in
- (sub)sequence (sub)sequence length and
- of p->left of p->right content) ?
- and q->left and q->right p->is : NULL
+ PLUS p->left p->right ZERO p->in
-If there's anything else we recognize in the tree, all four sequences get set
-to zero-length sequences. If there's something we don't recognize in the tree,
-we just return a zero-length sequence.
+ CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
+ p->left : q->right : q->is!=ZERO) ? q->in plus
+ p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
+ ZERO
-Break ties in favor of infrequent letters (choosing 'zzz' in preference to
-'aaa')?
+ OR longest common longest common (do p->is and substrings common to
+ leading trailing q->is have same p->in and q->in
+ (sub)sequence (sub)sequence length and
+ of p->left of p->right content) ?
+ and q->left and q->right p->is : NULL
-And. . .is it here or someplace that we might ponder "optimizations" such as
+ If there's anything else we recognize in the tree, all four sequences get set
+ to zero-length sequences. If there's something we don't recognize in the tree,
+ we just return a zero-length sequence.
+
+ Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+ 'aaa')?
+
+ And. . .is it here or someplace that we might ponder "optimizations" such as
egrep 'psi|epsilon' -> egrep 'psi'
egrep 'pepsi|epsilon' -> egrep 'epsi'
(Yes, we now find "epsi" as a "string
that must occur", but we might also
- simplify the *entire* r.e. being sought
-)
+ simplify the *entire* r.e. being sought)
grep '[c]' -> grep 'c'
grep '(ab|a)b' -> grep 'ab'
grep 'ab*' -> grep 'a'
grep 'a*b' -> grep 'b'
-There are several issues:
- Is optimization easy (enough)?
- Does optimization actually accomplish anything,
- or is the automaton you get from "psi|epsilon" (for example)
- the same as the one you get from "psi" (for example)?
+ There are several issues:
+
+ Is optimization easy (enough)?
+
+ Does optimization actually accomplish anything,
+ or is the automaton you get from "psi|epsilon" (for example)
+ the same as the one you get from "psi" (for example)?
- Are optimizable r.e.'s likely to be used in real-life situations
- (something like 'ab*' is probably unlikely; something like is
- 'psi|epsilon' is likelier)?
-*/
+ Are optimizable r.e.'s likely to be used in real-life situations
+ (something like 'ab*' is probably unlikely; something like is
+ 'psi|epsilon' is likelier)? */
static char *
icatalloc(old, new)
-char * old;
-char * new;
+ char *old;
+ char *new;
{
- register char * result;
- register int oldsize, newsize;
-
- newsize = (new == NULL) ? 0 : strlen(new);
- if (old == NULL)
- oldsize = 0;
- else if (newsize == 0)
- return old;
- else oldsize = strlen(old);
- if (old == NULL)
- result = (char *) malloc(newsize + 1);
- else result = (char *) realloc((void *) old, oldsize + newsize + 1);
- if (result != NULL && new != NULL)
- (void) strcpy(result + oldsize, new);
- return result;
+ char *result;
+ size_t oldsize, newsize;
+
+ newsize = (new == NULL) ? 0 : strlen(new);
+ if (old == NULL)
+ oldsize = 0;
+ else if (newsize == 0)
+ return old;
+ else oldsize = strlen(old);
+ if (old == NULL)
+ result = (char *) malloc(newsize + 1);
+ else
+ result = (char *) realloc((void *) old, oldsize + newsize + 1);
+ if (result != NULL && new != NULL)
+ (void) strcpy(result + oldsize, new);
+ return result;
}
static char *
icpyalloc(string)
-const char * string;
+ char *string;
{
- return icatalloc((char *) NULL, string);
+ return icatalloc((char *) NULL, string);
}
static char *
istrstr(lookin, lookfor)
-char * lookin;
-register char * lookfor;
+ char *lookin;
+ char *lookfor;
{
- register char * cp;
- register int len;
-
- len = strlen(lookfor);
- for (cp = lookin; *cp != '\0'; ++cp)
- if (strncmp(cp, lookfor, len) == 0)
- return cp;
- return NULL;
+ char *cp;
+ size_t len;
+
+ len = strlen(lookfor);
+ for (cp = lookin; *cp != '\0'; ++cp)
+ if (strncmp(cp, lookfor, len) == 0)
+ return cp;
+ return NULL;
}
static void
ifree(cp)
-char * cp;
+ char *cp;
{
- if (cp != NULL)
- free(cp);
+ if (cp != NULL)
+ free(cp);
}
static void
freelist(cpp)
-register char ** cpp;
+ char **cpp;
{
- register int i;
+ int i;
- if (cpp == NULL)
- return;
- for (i = 0; cpp[i] != NULL; ++i) {
- free(cpp[i]);
- cpp[i] = NULL;
- }
+ if (cpp == NULL)
+ return;
+ for (i = 0; cpp[i] != NULL; ++i)
+ {
+ free(cpp[i]);
+ cpp[i] = NULL;
+ }
}
static char **
enlist(cpp, new, len)
-register char ** cpp;
-register char * new;
-int len;
+ char **cpp;
+ char *new;
+ size_t len;
{
- register int i, j;
+ int i, j;
- if (cpp == NULL)
- return NULL;
- if ((new = icpyalloc(new)) == NULL) {
- freelist(cpp);
- return NULL;
- }
- new[len] = '\0';
- /*
- ** Is there already something in the list that's new (or longer)?
- */
- for (i = 0; cpp[i] != NULL; ++i)
- if (istrstr(cpp[i], new) != NULL) {
- free(new);
- return cpp;
- }
- /*
- ** Eliminate any obsoleted strings.
- */
- j = 0;
- while (cpp[j] != NULL)
- if (istrstr(new, cpp[j]) == NULL)
- ++j;
- else {
- free(cpp[j]);
- if (--i == j)
- break;
- cpp[j] = cpp[i];
- cpp[i] = 0;
- }
- /*
- ** Add the new string.
- */
- cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
- if (cpp == NULL)
- return NULL;
- cpp[i] = new;
- cpp[i + 1] = NULL;
+ if (cpp == NULL)
+ return NULL;
+ if ((new = icpyalloc(new)) == NULL)
+ {
+ freelist(cpp);
+ return NULL;
+ }
+ new[len] = '\0';
+ /* Is there already something in the list that's new (or longer)? */
+ for (i = 0; cpp[i] != NULL; ++i)
+ if (istrstr(cpp[i], new) != NULL)
+ {
+ free(new);
return cpp;
+ }
+ /* Eliminate any obsoleted strings. */
+ j = 0;
+ while (cpp[j] != NULL)
+ if (istrstr(new, cpp[j]) == NULL)
+ ++j;
+ else
+ {
+ free(cpp[j]);
+ if (--i == j)
+ break;
+ cpp[j] = cpp[i];
+ cpp[i] = NULL;
+ }
+ /* Add the new string. */
+ cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[i] = new;
+ cpp[i + 1] = NULL;
+ return cpp;
}
-/*
-** Given pointers to two strings,
-** return a pointer to an allocated list of their distinct common substrings.
-** Return NULL if something seems wild.
-*/
-
+/* Given pointers to two strings, return a pointer to an allocated
+ list of their distinct common substrings. Return NULL if something
+ seems wild. */
static char **
comsubs(left, right)
-char * left;
-char * right;
+ char *left;
+ char *right;
{
- register char ** cpp;
- register char * lcp;
- register char * rcp;
- register int i, len;
-
- if (left == NULL || right == NULL)
- return NULL;
- cpp = (char **) malloc(sizeof *cpp);
- if (cpp == NULL)
- return NULL;
- cpp[0] = NULL;
- for (lcp = left; *lcp != '\0'; ++lcp) {
- len = 0;
- rcp = index(right, *lcp);
- while (rcp != NULL) {
- for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
- ;
- if (i > len)
- len = i;
- rcp = index(rcp + 1, *lcp);
- }
- if (len == 0)
- continue;
- if ((cpp = enlist(cpp, lcp, len)) == NULL)
- break;
+ char **cpp;
+ char *lcp;
+ char *rcp;
+ size_t i, len;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ cpp = (char **) malloc(sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[0] = NULL;
+ for (lcp = left; *lcp != '\0'; ++lcp)
+ {
+ len = 0;
+ rcp = index(right, *lcp);
+ while (rcp != NULL)
+ {
+ for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+ continue;
+ if (i > len)
+ len = i;
+ rcp = index(rcp + 1, *lcp);
}
- return cpp;
+ if (len == 0)
+ continue;
+ if ((cpp = enlist(cpp, lcp, len)) == NULL)
+ break;
+ }
+ return cpp;
}
static char **
addlists(old, new)
-char ** old;
-char ** new;
+char **old;
+char **new;
{
- register int i;
-
- if (old == NULL || new == NULL)
- return NULL;
- for (i = 0; new[i] != NULL; ++i) {
- old = enlist(old, new[i], strlen(new[i]));
- if (old == NULL)
- break;
- }
- return old;
-}
+ int i;
-/*
-** Given two lists of substrings,
-** return a new list giving substrings common to both.
-*/
+ if (old == NULL || new == NULL)
+ return NULL;
+ for (i = 0; new[i] != NULL; ++i)
+ {
+ old = enlist(old, new[i], strlen(new[i]));
+ if (old == NULL)
+ break;
+ }
+ return old;
+}
+/* Given two lists of substrings, return a new list giving substrings
+ common to both. */
static char **
inboth(left, right)
-char ** left;
-char ** right;
+ char **left;
+ char **right;
{
- register char ** both;
- register char ** temp;
- register int lnum, rnum;
-
- if (left == NULL || right == NULL)
- return NULL;
- both = (char **) malloc(sizeof *both);
- if (both == NULL)
- return NULL;
- both[0] = NULL;
- for (lnum = 0; left[lnum] != NULL; ++lnum) {
- for (rnum = 0; right[rnum] != NULL; ++rnum) {
- temp = comsubs(left[lnum], right[rnum]);
- if (temp == NULL) {
- freelist(both);
- return NULL;
- }
- both = addlists(both, temp);
- freelist(temp);
- if (both == NULL)
- return NULL;
- }
+ char **both;
+ char **temp;
+ int lnum, rnum;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ both = (char **) malloc(sizeof *both);
+ if (both == NULL)
+ return NULL;
+ both[0] = NULL;
+ for (lnum = 0; left[lnum] != NULL; ++lnum)
+ {
+ for (rnum = 0; right[rnum] != NULL; ++rnum)
+ {
+ temp = comsubs(left[lnum], right[rnum]);
+ if (temp == NULL)
+ {
+ freelist(both);
+ return NULL;
+ }
+ both = addlists(both, temp);
+ freelist(temp);
+ free(temp);
+ if (both == NULL)
+ return NULL;
}
- return both;
+ }
+ return both;
}
-typedef struct {
- char ** in;
- char * left;
- char * right;
- char * is;
+typedef struct
+{
+ char **in;
+ char *left;
+ char *right;
+ char *is;
} must;
static void
resetmust(mp)
-register must * mp;
+must *mp;
{
- mp->left[0] = mp->right[0] = mp->is[0] = '\0';
- freelist(mp->in);
+ mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ freelist(mp->in);
}
static void
-regmust(reg)
-register struct regexp * reg;
+dfamust(dfa)
+struct dfa *dfa;
{
- register must * musts;
- register must * mp;
- register char * result;
- register int ri;
- register int i;
- register _token t;
- static must must0;
-
- reg->mustn = 0;
- reg->must[0] = '\0';
- musts = (must *) malloc((reg->tindex + 1) * sizeof *musts);
- if (musts == NULL)
- return;
- mp = musts;
- for (i = 0; i <= reg->tindex; ++i)
- mp[i] = must0;
- for (i = 0; i <= reg->tindex; ++i) {
- mp[i].in = (char **) malloc(sizeof *mp[i].in);
- mp[i].left = malloc(2);
- mp[i].right = malloc(2);
- mp[i].is = malloc(2);
- if (mp[i].in == NULL || mp[i].left == NULL ||
- mp[i].right == NULL || mp[i].is == NULL)
- goto done;
- mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
- mp[i].in[0] = NULL;
- }
- result = "";
+ must *musts;
+ must *mp;
+ char *result;
+ int ri;
+ int i;
+ int exact;
+ token t;
+ static must must0;
+ struct dfamust *dm;
+ static char empty_string[] = "";
+
+ result = empty_string;
+ exact = 0;
+ musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
+ if (musts == NULL)
+ return;
+ mp = musts;
+ for (i = 0; i <= dfa->tindex; ++i)
+ mp[i] = must0;
+ for (i = 0; i <= dfa->tindex; ++i)
+ {
+ mp[i].in = (char **) malloc(sizeof *mp[i].in);
+ mp[i].left = malloc(2);
+ mp[i].right = malloc(2);
+ mp[i].is = malloc(2);
+ if (mp[i].in == NULL || mp[i].left == NULL ||
+ mp[i].right == NULL || mp[i].is == NULL)
+ goto done;
+ mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
+ mp[i].in[0] = NULL;
+ }
#ifdef DEBUG
- fprintf(stderr, "regmust:\n");
- for (i = 0; i < reg->tindex; ++i) {
- fprintf(stderr, " %d:", i);
- prtok(reg->tokens[i]);
- }
- putc('\n', stderr);
+ fprintf(stderr, "dfamust:\n");
+ for (i = 0; i < dfa->tindex; ++i)
+ {
+ fprintf(stderr, " %d:", i);
+ prtok(dfa->tokens[i]);
+ }
+ putc('\n', stderr);
#endif
- for (ri = 0; ri < reg->tindex; ++ri) {
- switch (t = reg->tokens[ri]) {
- case _ALLBEGLINE:
- case _ALLENDLINE:
- case _LPAREN:
- case _RPAREN:
- goto done; /* "cannot happen" */
- case _EMPTY:
- case _BEGLINE:
- case _ENDLINE:
- case _BEGWORD:
- case _ENDWORD:
- case _LIMWORD:
- case _NOTLIMWORD:
- case _BACKREF:
- resetmust(mp);
- break;
- case _STAR:
- case _QMARK:
- if (mp <= musts)
- goto done; /* "cannot happen" */
- --mp;
- resetmust(mp);
- break;
- case _OR:
- if (mp < &musts[2])
- goto done; /* "cannot happen" */
- {
- register char ** new;
- register must * lmp;
- register must * rmp;
- register int j, ln, rn, n;
-
- rmp = --mp;
- lmp = --mp;
- /* Guaranteed to be. Unlikely, but. . . */
- if (strcmp(lmp->is, rmp->is) != 0)
- lmp->is[0] = '\0';
- /* Left side--easy */
- i = 0;
- while (lmp->left[i] != '\0' &&
- lmp->left[i] == rmp->left[i])
- ++i;
- lmp->left[i] = '\0';
- /* Right side */
- ln = strlen(lmp->right);
- rn = strlen(rmp->right);
- n = ln;
- if (n > rn)
- n = rn;
- for (i = 0; i < n; ++i)
- if (lmp->right[ln - i - 1] !=
- rmp->right[rn - i - 1])
- break;
- for (j = 0; j < i; ++j)
- lmp->right[j] =
- lmp->right[(ln - i) + j];
- lmp->right[j] = '\0';
- new = inboth(lmp->in, rmp->in);
- if (new == NULL)
- goto done;
- freelist(lmp->in);
- free((char *) lmp->in);
- lmp->in = new;
- }
- break;
- case _PLUS:
- if (mp <= musts)
- goto done; /* "cannot happen" */
- --mp;
- mp->is[0] = '\0';
- break;
- case _END:
- if (mp != &musts[1])
- goto done; /* "cannot happen" */
- for (i = 0; musts[0].in[i] != NULL; ++i)
- if (strlen(musts[0].in[i]) > strlen(result))
- result = musts[0].in[i];
- goto done;
- case _CAT:
- if (mp < &musts[2])
- goto done; /* "cannot happen" */
- {
- register must * lmp;
- register must * rmp;
-
- rmp = --mp;
- lmp = --mp;
- /*
- ** In. Everything in left, plus everything in
- ** right, plus catenation of
- ** left's right and right's left.
- */
- lmp->in = addlists(lmp->in, rmp->in);
- if (lmp->in == NULL)
- goto done;
- if (lmp->right[0] != '\0' &&
- rmp->left[0] != '\0') {
- register char * tp;
-
- tp = icpyalloc(lmp->right);
- if (tp == NULL)
- goto done;
- tp = icatalloc(tp, rmp->left);
- if (tp == NULL)
- goto done;
- lmp->in = enlist(lmp->in, tp,
- strlen(tp));
- free(tp);
- if (lmp->in == NULL)
- goto done;
- }
- /* Left-hand */
- if (lmp->is[0] != '\0') {
- lmp->left = icatalloc(lmp->left,
- rmp->left);
- if (lmp->left == NULL)
- goto done;
- }
- /* Right-hand */
- if (rmp->is[0] == '\0')
- lmp->right[0] = '\0';
- lmp->right = icatalloc(lmp->right, rmp->right);
- if (lmp->right == NULL)
- goto done;
- /* Guaranteed to be */
- if (lmp->is[0] != '\0' && rmp->is[0] != '\0') {
- lmp->is = icatalloc(lmp->is, rmp->is);
- if (lmp->is == NULL)
- goto done;
- } else
- lmp->is[0] = '\0';
- }
- break;
- default:
- if (t < _END) {
- /* "cannot happen" */
- goto done;
- } else if (t == '\0') {
- /* not on *my* shift */
- goto done;
- } else if (t >= _SET) {
- /* easy enough */
- resetmust(mp);
- } else {
- /* plain character */
- resetmust(mp);
- mp->is[0] = mp->left[0] = mp->right[0] = t;
- mp->is[1] = mp->left[1] = mp->right[1] = '\0';
- mp->in = enlist(mp->in, mp->is, 1);
- if (mp->in == NULL)
- goto done;
- }
- break;
- }
+ for (ri = 0; ri < dfa->tindex; ++ri)
+ {
+ switch (t = dfa->tokens[ri])
+ {
+ case LPAREN:
+ case RPAREN:
+ goto done; /* "cannot happen" */
+ case EMPTY:
+ case BEGLINE:
+ case ENDLINE:
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ case BACKREF:
+ resetmust(mp);
+ break;
+ case STAR:
+ case QMARK:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ resetmust(mp);
+ break;
+ case OR:
+ case ORTOP:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ char **new;
+ must *lmp;
+ must *rmp;
+ int j, ln, rn, n;
+
+ rmp = --mp;
+ lmp = --mp;
+ /* Guaranteed to be. Unlikely, but. . . */
+ if (strcmp(lmp->is, rmp->is) != 0)
+ lmp->is[0] = '\0';
+ /* Left side--easy */
+ i = 0;
+ while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
+ ++i;
+ lmp->left[i] = '\0';
+ /* Right side */
+ ln = strlen(lmp->right);
+ rn = strlen(rmp->right);
+ n = ln;
+ if (n > rn)
+ n = rn;
+ for (i = 0; i < n; ++i)
+ if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
+ break;
+ for (j = 0; j < i; ++j)
+ lmp->right[j] = lmp->right[(ln - i) + j];
+ lmp->right[j] = '\0';
+ new = inboth(lmp->in, rmp->in);
+ if (new == NULL)
+ goto done;
+ freelist(lmp->in);
+ free((char *) lmp->in);
+ lmp->in = new;
+ }
+ break;
+ case PLUS:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ mp->is[0] = '\0';
+ break;
+ case END:
+ if (mp != &musts[1])
+ goto done; /* "cannot happen" */
+ for (i = 0; musts[0].in[i] != NULL; ++i)
+ if (strlen(musts[0].in[i]) > strlen(result))
+ result = musts[0].in[i];
+ if (strcmp(result, musts[0].is) == 0)
+ exact = 1;
+ goto done;
+ case CAT:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ must *lmp;
+ must *rmp;
+
+ rmp = --mp;
+ lmp = --mp;
+ /* In. Everything in left, plus everything in
+ right, plus catenation of
+ left's right and right's left. */
+ lmp->in = addlists(lmp->in, rmp->in);
+ if (lmp->in == NULL)
+ goto done;
+ if (lmp->right[0] != '\0' &&
+ rmp->left[0] != '\0')
+ {
+ char *tp;
+
+ tp = icpyalloc(lmp->right);
+ if (tp == NULL)
+ goto done;
+ tp = icatalloc(tp, rmp->left);
+ if (tp == NULL)
+ goto done;
+ lmp->in = enlist(lmp->in, tp,
+ strlen(tp));
+ free(tp);
+ if (lmp->in == NULL)
+ goto done;
+ }
+ /* Left-hand */
+ if (lmp->is[0] != '\0')
+ {
+ lmp->left = icatalloc(lmp->left,
+ rmp->left);
+ if (lmp->left == NULL)
+ goto done;
+ }
+ /* Right-hand */
+ if (rmp->is[0] == '\0')
+ lmp->right[0] = '\0';
+ lmp->right = icatalloc(lmp->right, rmp->right);
+ if (lmp->right == NULL)
+ goto done;
+ /* Guaranteed to be */
+ if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
+ {
+ lmp->is = icatalloc(lmp->is, rmp->is);
+ if (lmp->is == NULL)
+ goto done;
+ }
+ else
+ lmp->is[0] = '\0';
+ }
+ break;
+ default:
+ if (t < END)
+ {
+ /* "cannot happen" */
+ goto done;
+ }
+ else if (t == '\0')
+ {
+ /* not on *my* shift */
+ goto done;
+ }
+ else if (t >= CSET)
+ {
+ /* easy enough */
+ resetmust(mp);
+ }
+ else
+ {
+ /* plain character */
+ resetmust(mp);
+ mp->is[0] = mp->left[0] = mp->right[0] = t;
+ mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+ mp->in = enlist(mp->in, mp->is, (size_t)1);
+ if (mp->in == NULL)
+ goto done;
+ }
+ break;
+ }
#ifdef DEBUG
- fprintf(stderr, " node: %d:", ri);
- prtok(reg->tokens[ri]);
- fprintf(stderr, "\n in:");
- for (i = 0; mp->in[i]; ++i)
- fprintf(stderr, " \"%s\"", mp->in[i]);
- fprintf(stderr, "\n is: \"%s\"\n", mp->is);
- fprintf(stderr, " left: \"%s\"\n", mp->left);
- fprintf(stderr, " right: \"%s\"\n", mp->right);
+ fprintf(stderr, " node: %d:", ri);
+ prtok(dfa->tokens[ri]);
+ fprintf(stderr, "\n in:");
+ for (i = 0; mp->in[i]; ++i)
+ fprintf(stderr, " \"%s\"", mp->in[i]);
+ fprintf(stderr, "\n is: \"%s\"\n", mp->is);
+ fprintf(stderr, " left: \"%s\"\n", mp->left);
+ fprintf(stderr, " right: \"%s\"\n", mp->right);
#endif
- ++mp;
- }
-done:
- (void) strncpy(reg->must, result, MUST_MAX - 1);
- reg->must[MUST_MAX - 1] = '\0';
- reg->mustn = strlen(reg->must);
- mp = musts;
- for (i = 0; i <= reg->tindex; ++i) {
- freelist(mp[i].in);
- ifree((char *) mp[i].in);
- ifree(mp[i].left);
- ifree(mp[i].right);
- ifree(mp[i].is);
- }
- free((char *) mp);
+ ++mp;
+ }
+ done:
+ if (strlen(result))
+ {
+ dm = (struct dfamust *) malloc(sizeof (struct dfamust));
+ dm->exact = exact;
+ dm->must = malloc(strlen(result) + 1);
+ strcpy(dm->must, result);
+ dm->next = dfa->musts;
+ dfa->musts = dm;
+ }
+ mp = musts;
+ for (i = 0; i <= dfa->tindex; ++i)
+ {
+ freelist(mp[i].in);
+ ifree((char *) mp[i].in);
+ ifree(mp[i].left);
+ ifree(mp[i].right);
+ ifree(mp[i].is);
+ }
+ free((char *) mp);
}
diff --git a/gnu/usr.bin/grep/dfa.h b/gnu/usr.bin/grep/dfa.h
index 33e4bf1..95b5d89 100644
--- a/gnu/usr.bin/grep/dfa.h
+++ b/gnu/usr.bin/grep/dfa.h
@@ -1,5 +1,5 @@
/* dfa.h - declarations for GNU deterministic regexp compiler
- Copyright (C) 1988 Free Software Foundation, Inc.
+ Copyright (C) 1988, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,213 +13,137 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
/* Written June, 1988 by Mike Haertel */
-
-#ifdef STDC_HEADERS
-#include <stddef.h>
-#include <stdlib.h>
+/* FIXME:
+ 2. We should not export so much of the DFA internals.
+ In addition to clobbering modularity, we eat up valuable
+ name space. */
-#else /* !STDC_HEADERS */
-
-#define const
-#include <sys/types.h> /* For size_t. */
-extern char *calloc(), *malloc(), *realloc();
-extern void free();
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-#endif /* ! STDC_HEADERS */
-
-#include <ctype.h>
-#ifndef isascii
-#define ISALNUM(c) isalnum(c)
-#define ISALPHA(c) isalpha(c)
-#define ISUPPER(c) isupper(c)
-#define ISLOWER(c) islower(c)
+# undef PARAMS
+#if __STDC__
+# ifndef _PTR_T
+# define _PTR_T
+ typedef void * ptr_t;
+# endif
+# define PARAMS(x) x
#else
-#define ISALNUM(c) (isascii(c) && isalnum(c))
-#define ISALPHA(c) (isascii(c) && isalpha(c))
-#define ISUPPER(c) (isascii(c) && isupper(c))
-#define ISLOWER(c) (isascii(c) && islower(c))
+# ifndef _PTR_T
+# define _PTR_T
+ typedef char * ptr_t;
+# endif
+# define PARAMS(x) ()
#endif
-/* 1 means plain parentheses serve as grouping, and backslash
- parentheses are needed for literal searching.
- 0 means backslash-parentheses are grouping, and plain parentheses
- are for literal searching. */
-#define RE_NO_BK_PARENS 1
-
-/* 1 means plain | serves as the "or"-operator, and \| is a literal.
- 0 means \| serves as the "or"-operator, and | is a literal. */
-#define RE_NO_BK_VBAR 2
-
-/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
- 1 means \+, \? are operators and plain +, ? are literals. */
-#define RE_BK_PLUS_QM 4
-
-/* 1 means | binds tighter than ^ or $.
- 0 means the contrary. */
-#define RE_TIGHT_VBAR 8
-
-/* 1 means treat \n as an _OR operator
- 0 means treat it as a normal character */
-#define RE_NEWLINE_OR 16
-
-/* 0 means that a special characters (such as *, ^, and $) always have
- their special meaning regardless of the surrounding context.
- 1 means that special characters may act as normal characters in some
- contexts. Specifically, this applies to:
- ^ - only special at the beginning, or after ( or |
- $ - only special at the end, or before ) or |
- *, +, ? - only special when not after the beginning, (, or | */
-#define RE_CONTEXT_INDEP_OPS 32
-
-/* Now define combinations of bits for the standard possibilities. */
-#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
-#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
-#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
-#define RE_SYNTAX_EMACS 0
-
/* Number of bits in an unsigned char. */
+#ifndef CHARBITS
#define CHARBITS 8
+#endif
/* First integer value that is greater than any character code. */
-#define _NOTCHAR (1 << CHARBITS)
+#define NOTCHAR (1 << CHARBITS)
/* INTBITS need not be exact, just a lower bound. */
+#ifndef INTBITS
#define INTBITS (CHARBITS * sizeof (int))
+#endif
/* Number of ints required to hold a bit for every character. */
-#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
+#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
-typedef int _charset[_CHARSET_INTS];
+typedef int charclass[CHARCLASS_INTS];
/* The regexp is parsed into an array of tokens in postfix form. Some tokens
are operators and others are terminal symbols. Most (but not all) of these
codes are returned by the lexical analyzer. */
-#if __STDC__
typedef enum
{
- _END = -1, /* _END is a terminal symbol that matches the
- end of input; any value of _END or less in
+ END = -1, /* END is a terminal symbol that matches the
+ end of input; any value of END or less in
the parse tree is such a symbol. Accepting
states of the DFA are those that would have
- a transition on _END. */
+ a transition on END. */
/* Ordinary character values are terminal symbols that match themselves. */
- _EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches
+ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
the empty string. */
- _BACKREF, /* _BACKREF is generated by \<digit>; it
+ BACKREF, /* BACKREF is generated by \<digit>; it
it not completely handled. If the scanner
detects a transition on backref, it returns
a kind of "semi-success" indicating that
the match will have to be verified with
a backtracking matcher. */
- _BEGLINE, /* _BEGLINE is a terminal symbol that matches
+ BEGLINE, /* BEGLINE is a terminal symbol that matches
the empty string if it is at the beginning
of a line. */
- _ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that
- matches the empty string if it is at the
- beginning of a line; _ALLBEGLINE applies
- to the entire regexp and can only occur
- as the first token thereof. _ALLBEGLINE
- never appears in the parse tree; a _BEGLINE
- is prepended with _CAT to the entire
- regexp instead. */
-
- _ENDLINE, /* _ENDLINE is a terminal symbol that matches
+ ENDLINE, /* ENDLINE is a terminal symbol that matches
the empty string if it is at the end of
a line. */
- _ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
- is to _BEGLINE. */
-
- _BEGWORD, /* _BEGWORD is a terminal symbol that matches
+ BEGWORD, /* BEGWORD is a terminal symbol that matches
the empty string if it is at the beginning
of a word. */
- _ENDWORD, /* _ENDWORD is a terminal symbol that matches
+ ENDWORD, /* ENDWORD is a terminal symbol that matches
the empty string if it is at the end of
a word. */
- _LIMWORD, /* _LIMWORD is a terminal symbol that matches
+ LIMWORD, /* LIMWORD is a terminal symbol that matches
the empty string if it is at the beginning
or the end of a word. */
- _NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that
+ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
matches the empty string if it is not at
the beginning or end of a word. */
- _QMARK, /* _QMARK is an operator of one argument that
+ QMARK, /* QMARK is an operator of one argument that
matches zero or one occurences of its
argument. */
- _STAR, /* _STAR is an operator of one argument that
+ STAR, /* STAR is an operator of one argument that
matches the Kleene closure (zero or more
occurrences) of its argument. */
- _PLUS, /* _PLUS is an operator of one argument that
+ PLUS, /* PLUS is an operator of one argument that
matches the positive closure (one or more
occurrences) of its argument. */
- _CAT, /* _CAT is an operator of two arguments that
+ REPMN, /* REPMN is a lexical token corresponding
+ to the {m,n} construct. REPMN never
+ appears in the compiled token vector. */
+
+ CAT, /* CAT is an operator of two arguments that
matches the concatenation of its
- arguments. _CAT is never returned by the
+ arguments. CAT is never returned by the
lexical analyzer. */
- _OR, /* _OR is an operator of two arguments that
+ OR, /* OR is an operator of two arguments that
matches either of its arguments. */
- _LPAREN, /* _LPAREN never appears in the parse tree,
+ ORTOP, /* OR at the toplevel in the parse tree.
+ This is used for a boyer-moore heuristic. */
+
+ LPAREN, /* LPAREN never appears in the parse tree,
it is only a lexeme. */
- _RPAREN, /* _RPAREN never appears in the parse tree. */
+ RPAREN, /* RPAREN never appears in the parse tree. */
- _SET /* _SET and (and any value greater) is a
+ CSET /* CSET and (and any value greater) is a
terminal symbol that matches any of a
class of characters. */
-} _token;
-
-#else /* ! __STDC__ */
-
-typedef short _token;
-
-#define _END -1
-#define _EMPTY _NOTCHAR
-#define _BACKREF (_EMPTY + 1)
-#define _BEGLINE (_EMPTY + 2)
-#define _ALLBEGLINE (_EMPTY + 3)
-#define _ENDLINE (_EMPTY + 4)
-#define _ALLENDLINE (_EMPTY + 5)
-#define _BEGWORD (_EMPTY + 6)
-#define _ENDWORD (_EMPTY + 7)
-#define _LIMWORD (_EMPTY + 8)
-#define _NOTLIMWORD (_EMPTY + 9)
-#define _QMARK (_EMPTY + 10)
-#define _STAR (_EMPTY + 11)
-#define _PLUS (_EMPTY + 12)
-#define _CAT (_EMPTY + 13)
-#define _OR (_EMPTY + 14)
-#define _LPAREN (_EMPTY + 15)
-#define _RPAREN (_EMPTY + 16)
-#define _SET (_EMPTY + 17)
-
-#endif /* ! __STDC__ */
-
-/* Sets are stored in an array in the compiled regexp; the index of the
- array corresponding to a given set token is given by _SET_INDEX(t). */
-#define _SET_INDEX(t) ((t) - _SET)
+} token;
+
+/* Sets are stored in an array in the compiled dfa; the index of the
+ array corresponding to a given set token is given by SET_INDEX(t). */
+#define SET_INDEX(t) ((t) - CSET)
/* Sometimes characters can only be matched depending on the surrounding
context. Such context decisions depend on what the previous character
@@ -239,36 +163,36 @@ typedef short _token;
Word-constituent characters are those that satisfy isalnum().
- The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+ The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
succeeds in a particular context. Prevn is true if the previous character
was a newline, currn is true if the lookahead character is a newline.
Prevl and currl similarly depend upon whether the previous and current
characters are word-constituent letters. */
-#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
- ((constraint) & 1 << ((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)
-#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
- ((constraint) & 1 << ((prevl) ? 2 : 0) + ((currl) ? 1 : 0))
-#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
- (_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
- && _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
+#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+ ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
+#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+ (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
/* The following macros give information about what a constraint depends on. */
-#define _PREV_NEWLINE_DEPENDENT(constraint) \
+#define PREV_NEWLINE_DEPENDENT(constraint) \
(((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
-#define _PREV_LETTER_DEPENDENT(constraint) \
+#define PREV_LETTER_DEPENDENT(constraint) \
(((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
/* Tokens that match the empty string subject to some constraint actually
work by applying that constraint to determine what may follow them,
taking into account what has gone before. The following values are
the constraints corresponding to the special tokens previously defined. */
-#define _NO_CONSTRAINT 0xff
-#define _BEGLINE_CONSTRAINT 0xcf
-#define _ENDLINE_CONSTRAINT 0xaf
-#define _BEGWORD_CONSTRAINT 0xf2
-#define _ENDWORD_CONSTRAINT 0xf4
-#define _LIMWORD_CONSTRAINT 0xf6
-#define _NOTLIMWORD_CONSTRAINT 0xf9
+#define NO_CONSTRAINT 0xff
+#define BEGLINE_CONSTRAINT 0xcf
+#define ENDLINE_CONSTRAINT 0xaf
+#define BEGWORD_CONSTRAINT 0xf2
+#define ENDWORD_CONSTRAINT 0xf4
+#define LIMWORD_CONSTRAINT 0xf6
+#define NOTLIMWORD_CONSTRAINT 0xf9
/* States of the recognizer correspond to sets of positions in the parse
tree, together with the constraints under which they may be matched.
@@ -278,44 +202,48 @@ typedef struct
{
unsigned index; /* Index into the parse array. */
unsigned constraint; /* Constraint for matching this position. */
-} _position;
+} position;
/* Sets of positions are stored as arrays. */
typedef struct
{
- _position *elems; /* Elements of this position set. */
+ position *elems; /* Elements of this position set. */
int nelem; /* Number of elements in this set. */
-} _position_set;
+} position_set;
-/* A state of the regexp consists of a set of positions, some flags,
+/* A state of the dfa consists of a set of positions, some flags,
and the token value of the lowest-numbered position of the state that
- contains an _END token. */
+ contains an END token. */
typedef struct
{
int hash; /* Hash of the positions of this state. */
- _position_set elems; /* Positions this state could match. */
+ position_set elems; /* Positions this state could match. */
char newline; /* True if previous state matched newline. */
char letter; /* True if previous state matched a letter. */
char backref; /* True if this state matches a \<digit>. */
unsigned char constraint; /* Constraint for this state to accept. */
- int first_end; /* Token value of the first _END in elems. */
-} _dfa_state;
+ int first_end; /* Token value of the first END in elems. */
+} dfa_state;
-/* If an r.e. is at most MUST_MAX characters long, we look for a string which
- must appear in it; whatever's found is dropped into the struct reg. */
-
-#define MUST_MAX 50
+/* Element of a list of strings, at least one of which is known to
+ appear in any R.E. matching the DFA. */
+struct dfamust
+{
+ int exact;
+ char *must;
+ struct dfamust *next;
+};
/* A compiled regular expression. */
-struct regexp
+struct dfa
{
/* Stuff built by the scanner. */
- _charset *charsets; /* Array of character sets for _SET tokens. */
- int cindex; /* Index for adding new charsets. */
- int calloc; /* Number of charsets currently allocated. */
+ charclass *charclasses; /* Array of character sets for CSET tokens. */
+ int cindex; /* Index for adding new charclasses. */
+ int calloc; /* Number of charclasses currently allocated. */
/* Stuff built by the parser. */
- _token *tokens; /* Postfix parse array. */
+ token *tokens; /* Postfix parse array. */
int tindex; /* Index for adding new tokens. */
int talloc; /* Number of tokens currently allocated. */
int depth; /* Depth required of an evaluation stack
@@ -323,15 +251,15 @@ struct regexp
parse tree. */
int nleaves; /* Number of leaves on the parse tree. */
int nregexps; /* Count of parallel regexps being built
- with regparse(). */
+ with dfaparse(). */
/* Stuff owned by the state builder. */
- _dfa_state *states; /* States of the regexp. */
+ dfa_state *states; /* States of the dfa. */
int sindex; /* Index for adding new states. */
int salloc; /* Number of states currently allocated. */
/* Stuff built by the structure analyzer. */
- _position_set *follows; /* Array of follow sets, indexed by position
+ position_set *follows; /* Array of follow sets, indexed by position
index. The follow of a position is the set
of positions containing characters that
could conceivably follow a character
@@ -361,7 +289,7 @@ struct regexp
int **fails; /* Transition tables after failing to accept
on a state that potentially could do so. */
int *success; /* Table of acceptance conditions used in
- regexecute and computed in build_state. */
+ dfaexec and computed in build_state. */
int *newlines; /* Transitions on newlines. The entry for a
newline in any transition table is always
-1 so we can count lines without wasting
@@ -369,40 +297,39 @@ struct regexp
newline is stored separately and handled
as a special case. Newline is also used
as a sentinel at the end of the buffer. */
- char must[MUST_MAX];
- int mustn;
+ struct dfamust *musts; /* List of strings, at least one of which
+ is known to appear in any r.e. matching
+ the dfa. */
};
-/* Some macros for user access to regexp internals. */
+/* Some macros for user access to dfa internals. */
/* ACCEPTING returns true if s could possibly be an accepting state of r. */
#define ACCEPTING(s, r) ((r).states[s].constraint)
/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
specified context. */
-#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
- _SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \
+#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
+ SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \
prevn, currn, prevl, currl)
/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
regexps that a given state could accept. Parallel regexps are numbered
starting at 1. */
-#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
+#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
/* Entry points. */
-#if __STDC__
-
-/* Regsyntax() takes two arguments; the first sets the syntax bits described
+/* dfasyntax() takes two arguments; the first sets the syntax bits described
earlier in this file, and the second sets the case-folding flag. */
-extern void regsyntax(int, int);
+extern void dfasyntax PARAMS ((reg_syntax_t, int));
-/* Compile the given string of the given length into the given struct regexp.
+/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an
exact matcher. */
-extern void regcompile(const char *, size_t, struct regexp *, int);
+extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int));
-/* Execute the given struct regexp on the buffer of characters. The
+/* Execute the given struct dfa on the buffer of characters. The
first char * points to the beginning, and the second points to the
first character after the end of the buffer, which must be a writable
place so a sentinel end-of-buffer marker can be stored there. The
@@ -414,37 +341,31 @@ extern void regcompile(const char *, size_t, struct regexp *, int);
order to verify backreferencing; otherwise the flag will be cleared.
Returns NULL if no match is found, or a pointer to the first
character after the first & shortest matching string in the buffer. */
-extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
+extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *));
-/* Free the storage held by the components of a struct regexp. */
-extern void regfree(struct regexp *);
+/* Free the storage held by the components of a struct dfa. */
+extern void dfafree PARAMS ((struct dfa *));
/* Entry points for people who know what they're doing. */
-/* Initialize the components of a struct regexp. */
-extern void reginit(struct regexp *);
+/* Initialize the components of a struct dfa. */
+extern void dfainit PARAMS ((struct dfa *));
-/* Incrementally parse a string of given length into a struct regexp. */
-extern void regparse(const char *, size_t, struct regexp *);
+/* Incrementally parse a string of given length into a struct dfa. */
+extern void dfaparse PARAMS ((char *, size_t, struct dfa *));
/* Analyze a parsed regexp; second argument tells whether to build a searching
or an exact matcher. */
-extern void reganalyze(struct regexp *, int);
+extern void dfaanalyze PARAMS ((struct dfa *, int));
/* Compute, for each possible character, the transitions out of a given
state, storing them in an array of integers. */
-extern void regstate(int, struct regexp *, int []);
+extern void dfastate PARAMS ((int, struct dfa *, int []));
/* Error handling. */
-/* Regerror() is called by the regexp routines whenever an error occurs. It
+/* dfaerror() is called by the regexp routines whenever an error occurs. It
takes a single argument, a NUL-terminated string describing the error.
- The default regerror() prints the error message to stderr and exits.
- The user can provide a different regfree() if so desired. */
-extern void regerror(const char *);
-
-#else /* ! __STDC__ */
-extern void regsyntax(), regcompile(), regfree(), reginit(), regparse();
-extern void reganalyze(), regstate(), regerror();
-extern char *regexecute();
-#endif /* ! __STDC__ */
+ The default dfaerror() prints the error message to stderr and exits.
+ The user can provide a different dfafree() if so desired. */
+extern void dfaerror PARAMS ((const char *));
diff --git a/gnu/usr.bin/grep/getopt.c b/gnu/usr.bin/grep/getopt.c
index 0661bdf..eac576b 100644
--- a/gnu/usr.bin/grep/getopt.c
+++ b/gnu/usr.bin/grep/getopt.c
@@ -3,63 +3,98 @@
"Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
before changing it!
- Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc.
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97
+ Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
- This program is distributed in the hope that it will be useful,
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
-/* AIX requires this to be the first thing in the file. */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else /* not __GNUC__ */
-#if defined(sparc) && !defined(USG) && !defined(SVR4) && !defined(__svr4__)
-#include <alloca.h>
-#else
-#ifdef _AIX
- #pragma alloca
-#else
-char *alloca ();
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+#define _NO_PROTO
#endif
-#endif /* sparc */
-#endif /* not __GNUC__ */
-#ifdef LIBC
-/* For when compiled as part of the GNU C library. */
-#include <ansidecl.h>
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
#endif
#include <stdio.h>
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
/* This needs to come after some library #include
to get __GNU_LIBRARY__ defined. */
#ifdef __GNU_LIBRARY__
-#undef alloca
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
#include <stdlib.h>
-#include <string.h>
-#else /* Not GNU C library. */
-#define __alloca alloca
+#include <unistd.h>
#endif /* GNU C library. */
+#ifdef VMS
+#include <unixlib.h>
+#if HAVE_STRING_H - 0
+#include <string.h>
+#endif
+#endif
-#ifndef __STDC__
-#define const
+#if defined (WIN32) && !defined (__CYGWIN32__)
+/* It's not Unix, really. See? Capital letters. */
+#include <windows.h>
+#define getpid() GetCurrentProcessId()
#endif
-/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
- long-named option. Because this is not POSIX.2 compliant, it is
- being phased out. */
-#define GETOPT_COMPAT
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+ When compiling libc, the _ macro is predefined. */
+#ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+#else
+# define _(msgid) (msgid)
+#endif
+#endif
/* This version of `getopt' appears to the caller like standard Unix `getopt'
but it behaves differently for the user, since it allows the user
@@ -83,7 +118,7 @@ char *alloca ();
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
-char *optarg = 0;
+char *optarg = NULL;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
@@ -91,13 +126,20 @@ char *optarg = 0;
On entry to `getopt', zero means this is the first call; initialize.
- When `getopt' returns EOF, this is the index of the first of the
+ When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
-int optind = 0;
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+ causes problems with re-calling getopt as programs generally don't
+ know that. */
+
+int __getopt_initialized = 0;
/* The next char to be scanned in the option-element
in which the last option character we returned was found.
@@ -113,6 +155,12 @@ static char *nextchar;
int opterr = 1;
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
/* Describe how to deal with options that follow non-option ARGV-elements.
If the caller did not specify anything,
@@ -140,17 +188,23 @@ int opterr = 1;
The special argument `--' forces an end of option-scanning regardless
of the value of `ordering'. In the case of RETURN_IN_ORDER, only
- `--' can cause `getopt' to return EOF with `optind' != ARGC. */
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
static enum
{
REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
#include <string.h>
#define my_index strchr
-#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n))
#else
/* Avoid depending on library functions or files
@@ -159,29 +213,32 @@ static enum
char *getenv ();
static char *
-my_index (string, chr)
- char *string;
+my_index (str, chr)
+ const char *str;
int chr;
{
- while (*string)
+ while (*str)
{
- if (*string == chr)
- return string;
- string++;
+ if (*str == chr)
+ return (char *) str;
+ str++;
}
return 0;
}
-static void
-my_bcopy (from, to, size)
- char *from, *to;
- int size;
-{
- int i;
- for (i = 0; i < size; i++)
- to[i] = from[i];
-}
-#endif /* GNU C library. */
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it. */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+#if !defined (__STDC__) || !__STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
/* Handle permutation of arguments. */
@@ -192,6 +249,31 @@ my_bcopy (from, to, size)
static int first_nonopt;
static int last_nonopt;
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+static const char *nonoption_flags;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+ is valid for the getopt call we must make sure that the ARGV passed
+ to getopt is that one passed to the process. */
+static void store_args (int argc, char *const *argv) __attribute__ ((unused));
+static void
+store_args (int argc, char *const *argv)
+{
+ /* XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+ original_argc = argc;
+ original_argv = argv;
+}
+text_set_element (__libc_subinit, store_args);
+#endif
+
/* Exchange two adjacent subsequences of ARGV.
One subsequence is elements [first_nonopt,last_nonopt)
which contains all the non-options that have been skipped so far.
@@ -201,25 +283,126 @@ static int last_nonopt;
`first_nonopt' and `last_nonopt' are relocated so that they describe
the new indices of the non-options in ARGV after they are moved. */
+#if defined (__STDC__) && __STDC__
+static void exchange (char **);
+#endif
+
static void
exchange (argv)
char **argv;
{
- int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
- char **temp = (char **) __alloca (nonopts_size);
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
- /* Interchange the two blocks of data in ARGV. */
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
- my_bcopy (&argv[first_nonopt], temp, nonopts_size);
- my_bcopy (&argv[last_nonopt], &argv[first_nonopt],
- (optind - last_nonopt) * sizeof (char *));
- my_bcopy (temp, &argv[first_nonopt + optind - last_nonopt], nonopts_size);
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
/* Update records for the slots the non-options now occupy. */
first_nonopt += (optind - last_nonopt);
last_nonopt = optind;
}
+
+/* Initialize the internal data when the first call is made. */
+
+#if defined (__STDC__) && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+#ifdef _LIBC
+ if (posixly_correct == NULL
+ && argc == original_argc && argv == original_argv)
+ {
+ /* Bash 2.0 puts a special variable in the environment for each
+ command it runs, specifying which ARGV elements are the results of
+ file name wildcard expansion and therefore should not be
+ considered as options. */
+ char var[100];
+ sprintf (var, "_%d_GNU_nonoption_argv_flags_", getpid ());
+ nonoption_flags = getenv (var);
+ if (nonoption_flags == NULL)
+ nonoption_flags_len = 0;
+ else
+ nonoption_flags_len = strlen (nonoption_flags);
+ }
+ else
+ nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
/* Scan elements of ARGV (whose length is ARGC) for option characters
given in OPTSTRING.
@@ -234,7 +417,7 @@ exchange (argv)
updating `optind' and `nextchar' so that the next call to `getopt' can
resume the scan with the following option character or ARGV-element.
- If there are no more option characters, `getopt' returns `EOF'.
+ If there are no more option characters, `getopt' returns -1.
Then `optind' is the index in ARGV of the first ARGV-element
that is not an option. (The ARGV-elements have been permuted
so that those that are not options now come last.)
@@ -286,41 +469,38 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
int *longind;
int long_only;
{
- int option_index;
-
- optarg = 0;
-
- /* Initialize the internal data when the first call is made.
- Start processing options with ARGV-element 1 (since ARGV-element 0
- is the program name); the sequence of previously skipped
- non-option ARGV-elements is empty. */
+ optarg = NULL;
- if (optind == 0)
+ if (!__getopt_initialized || optind == 0)
{
- first_nonopt = last_nonopt = optind = 1;
-
- nextchar = NULL;
-
- /* Determine how to handle the ordering of options and nonoptions. */
-
- if (optstring[0] == '-')
- {
- ordering = RETURN_IN_ORDER;
- ++optstring;
- }
- else if (optstring[0] == '+')
- {
- ordering = REQUIRE_ORDER;
- ++optstring;
- }
- else if (getenv ("POSIXLY_CORRECT") != NULL)
- ordering = REQUIRE_ORDER;
- else
- ordering = PERMUTE;
+ optstring = _getopt_initialize (argc, argv, optstring);
+ optind = 1; /* Don't scan ARGV[0], the program name. */
+ __getopt_initialized = 1;
}
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#ifdef _LIBC
+#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \
+ || (optind < nonoption_flags_len \
+ && nonoption_flags[optind] == '1'))
+#else
+#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
if (nextchar == NULL || *nextchar == '\0')
{
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (last_nonopt > optind)
+ last_nonopt = optind;
+ if (first_nonopt > optind)
+ first_nonopt = optind;
+
if (ordering == PERMUTE)
{
/* If we have just processed some options following some non-options,
@@ -331,21 +511,15 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
else if (last_nonopt != optind)
first_nonopt = optind;
- /* Now skip any additional non-options
+ /* Skip any additional non-options
and extend the range of non-options previously skipped. */
- while (optind < argc
- && (argv[optind][0] != '-' || argv[optind][1] == '\0')
-#ifdef GETOPT_COMPAT
- && (longopts == NULL
- || argv[optind][0] != '+' || argv[optind][1] == '\0')
-#endif /* GETOPT_COMPAT */
- )
+ while (optind < argc && NONOPTION_P)
optind++;
last_nonopt = optind;
}
- /* Special ARGV-element `--' means premature end of options.
+ /* The special ARGV-element `--' means premature end of options.
Skip it like a null option,
then exchange with previous non-options as if it were an option,
then skip everything else like a non-option. */
@@ -372,56 +546,64 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
that we previously skipped, so the caller will digest them. */
if (first_nonopt != last_nonopt)
optind = first_nonopt;
- return EOF;
+ return -1;
}
/* If we have come to a non-option and did not permute it,
either stop the scan or describe it to the caller and pass it by. */
- if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
-#ifdef GETOPT_COMPAT
- && (longopts == NULL
- || argv[optind][0] != '+' || argv[optind][1] == '\0')
-#endif /* GETOPT_COMPAT */
- )
+ if (NONOPTION_P)
{
if (ordering == REQUIRE_ORDER)
- return EOF;
+ return -1;
optarg = argv[optind++];
return 1;
}
/* We have found another option-ARGV-element.
- Start decoding its characters. */
+ Skip the initial punctuation. */
nextchar = (argv[optind] + 1
+ (longopts != NULL && argv[optind][1] == '-'));
}
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
if (longopts != NULL
- && ((argv[optind][0] == '-'
- && (argv[optind][1] == '-' || long_only))
-#ifdef GETOPT_COMPAT
- || argv[optind][0] == '+'
-#endif /* GETOPT_COMPAT */
- ))
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
{
+ char *nameend;
const struct option *p;
- char *s = nextchar;
+ const struct option *pfound = NULL;
int exact = 0;
int ambig = 0;
- const struct option *pfound = NULL;
- int indfound;
+ int indfound = -1;
+ int option_index;
- while (*s && *s != '=')
- s++;
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
- /* Test all options for either exact match or abbreviated matches. */
- for (p = longopts, option_index = 0; p->name;
- p++, option_index++)
- if (!strncmp (p->name, nextchar, s - nextchar))
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
{
- if (s - nextchar == strlen (p->name))
+ if ((unsigned int) (nameend - nextchar)
+ == (unsigned int) strlen (p->name))
{
/* Exact match found. */
pfound = p;
@@ -436,17 +618,18 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
indfound = option_index;
}
else
- /* Second nonexact match found. */
+ /* Second or later nonexact match found. */
ambig = 1;
}
if (ambig && !exact)
{
if (opterr)
- fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
argv[0], argv[optind]);
nextchar += strlen (nextchar);
optind++;
+ optopt = 0;
return '?';
}
@@ -454,28 +637,29 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
{
option_index = indfound;
optind++;
- if (*s)
+ if (*nameend)
{
/* Don't test has_arg with >, because some C compilers don't
- allow it to be used on enums. */
+ allow it to be used on enums. */
if (pfound->has_arg)
- optarg = s + 1;
+ optarg = nameend + 1;
else
{
if (opterr)
- {
- if (argv[optind - 1][1] == '-')
- /* --option */
- fprintf (stderr,
- "%s: option `--%s' doesn't allow an argument\n",
- argv[0], pfound->name);
- else
- /* +option or -option */
- fprintf (stderr,
- "%s: option `%c%s' doesn't allow an argument\n",
- argv[0], argv[optind - 1][0], pfound->name);
- }
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ _("%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ _("%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[optind - 1][0], pfound->name);
+
nextchar += strlen (nextchar);
+
+ optopt = pfound->val;
return '?';
}
}
@@ -486,10 +670,12 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
else
{
if (opterr)
- fprintf (stderr, "%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]);
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
nextchar += strlen (nextchar);
- return '?';
+ optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
}
}
nextchar += strlen (nextchar);
@@ -502,34 +688,33 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
}
return pfound->val;
}
+
/* Can't find it as a long option. If this is not getopt_long_only,
or the option starts with '--' or is not a valid short
option, then it's an error.
- Otherwise interpret it as a short option. */
+ Otherwise interpret it as a short option. */
if (!long_only || argv[optind][1] == '-'
-#ifdef GETOPT_COMPAT
- || argv[optind][0] == '+'
-#endif /* GETOPT_COMPAT */
|| my_index (optstring, *nextchar) == NULL)
{
if (opterr)
{
if (argv[optind][1] == '-')
/* --option */
- fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
argv[0], nextchar);
else
/* +option or -option */
- fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
argv[0], argv[optind][0], nextchar);
}
- nextchar += strlen (nextchar);
+ nextchar = (char *) "";
optind++;
+ optopt = 0;
return '?';
}
}
- /* Look at and handle the next option-character. */
+ /* Look at and handle the next short option-character. */
{
char c = *nextchar++;
@@ -537,20 +722,147 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
/* Increment `optind' when we start to process its last character. */
if (*nextchar == '\0')
- optind++;
+ ++optind;
if (temp == NULL || c == ':')
{
if (opterr)
{
- if (c < 040 || c >= 0177)
- fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: illegal option -- %c\n"),
argv[0], c);
else
- fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
+ fprintf (stderr, _("%s: invalid option -- %c\n"),
+ argv[0], c);
}
+ optopt = c;
return '?';
}
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
if (temp[1] == ':')
{
if (temp[2] == ':')
@@ -562,13 +874,13 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
optind++;
}
else
- optarg = 0;
+ optarg = NULL;
nextchar = NULL;
}
else
{
/* This is an option that requires an argument. */
- if (*nextchar != 0)
+ if (*nextchar != '\0')
{
optarg = nextchar;
/* If we end this ARGV-element by taking the rest as an arg,
@@ -578,9 +890,17 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
else if (optind == argc)
{
if (opterr)
- fprintf (stderr, "%s: option `-%c' requires an argument\n",
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
argv[0], c);
- c = '?';
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
}
else
/* We already incremented `optind' once;
@@ -604,6 +924,8 @@ getopt (argc, argv, optstring)
(int *) 0,
0);
}
+
+#endif /* Not ELIDE_CODE. */
#ifdef TEST
@@ -623,7 +945,7 @@ main (argc, argv)
int this_option_optind = optind ? optind : 1;
c = getopt (argc, argv, "abc:d:0123456789");
- if (c == EOF)
+ if (c == -1)
break;
switch (c)
diff --git a/gnu/usr.bin/grep/getopt.h b/gnu/usr.bin/grep/getopt.h
index f64de31..2d8c8f9 100644
--- a/gnu/usr.bin/grep/getopt.h
+++ b/gnu/usr.bin/grep/getopt.h
@@ -1,23 +1,31 @@
/* Declarations for getopt.
- Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Copyright (C) 1989,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
- This program is distributed in the hope that it will be useful,
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
#ifndef _GETOPT_H
#define _GETOPT_H 1
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
@@ -32,7 +40,7 @@ extern char *optarg;
On entry to `getopt', zero means this is the first call; initialize.
- When `getopt' returns EOF, this is the index of the first of the
+ When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
@@ -45,6 +53,10 @@ extern int optind;
extern int opterr;
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
@@ -68,7 +80,7 @@ extern int opterr;
struct option
{
-#if __STDC__
+#if defined (__STDC__) && __STDC__
const char *name;
#else
char *name;
@@ -82,15 +94,19 @@ struct option
/* Names for the values of the `has_arg' field of `struct option'. */
-enum _argtype
-{
- no_argument,
- required_argument,
- optional_argument
-};
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
-#if __STDC__
+#if defined (__STDC__) && __STDC__
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
const struct option *longopts, int *longind);
extern int getopt_long_only (int argc, char *const *argv,
@@ -108,6 +124,10 @@ extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
-#endif /* not __STDC__ */
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
#endif /* _GETOPT_H */
diff --git a/gnu/usr.bin/grep/getopt1.c b/gnu/usr.bin/grep/getopt1.c
new file mode 100644
index 0000000..a967c30
--- /dev/null
+++ b/gnu/usr.bin/grep/getopt1.c
@@ -0,0 +1,189 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "getopt.h"
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf (_("option %s"), long_options[option_index].name);
+ if (optarg)
+ printf (_(" with arg %s"), optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf (_("digits occur in two different argv-elements.\n"));
+ digit_optind = this_option_optind;
+ printf (_("option %c\n"), c);
+ break;
+
+ case 'a':
+ printf (_("option a\n"));
+ break;
+
+ case 'b':
+ printf (_("option b\n"));
+ break;
+
+ case 'c':
+ printf (_("option c with value `%s'\n"), optarg);
+ break;
+
+ case 'd':
+ printf (_("option d with value `%s'\n"), optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf (_("?? getopt returned character code 0%o ??\n"), c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf (_("non-option ARGV-elements: "));
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/grep/getpagesize.h b/gnu/usr.bin/grep/getpagesize.h
index e6bd561..a064973 100644
--- a/gnu/usr.bin/grep/getpagesize.h
+++ b/gnu/usr.bin/grep/getpagesize.h
@@ -1,42 +1,41 @@
-#ifdef BSD
-#ifndef BSD4_1
-#define HAVE_GETPAGESIZE
-#endif
-#endif
+/* Emulate getpagesize on systems that lack it. */
#ifndef HAVE_GETPAGESIZE
-#ifdef VMS
-#define getpagesize() 512
-#endif
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#ifdef _SC_PAGESIZE
-#define getpagesize() sysconf(_SC_PAGESIZE)
-#else
-
-#ifdef HAVE_SYS_PARAM_H
-#include <sys/param.h>
-
-#ifdef EXEC_PAGESIZE
-#define getpagesize() EXEC_PAGESIZE
-#else
-#ifdef NBPG
-#define getpagesize() NBPG * CLSIZE
-#ifndef CLSIZE
-#define CLSIZE 1
-#endif /* no CLSIZE */
-#else /* no NBPG */
-#define getpagesize() NBPC
-#endif /* no NBPG */
-#endif /* no EXEC_PAGESIZE */
-#else /* !HAVE_SYS_PARAM_H */
-#define getpagesize() 8192 /* punt totally */
-#endif /* !HAVE_SYS_PARAM_H */
-#endif /* no _SC_PAGESIZE */
-
-#endif /* not HAVE_GETPAGESIZE */
-
+# ifdef VMS
+# define getpagesize() 512
+# endif
+
+# ifdef HAVE_UNISTD_H
+# include <unistd.h>
+# endif
+
+# ifdef _SC_PAGESIZE
+# define getpagesize() sysconf(_SC_PAGESIZE)
+# else /* no _SC_PAGESIZE */
+# ifdef HAVE_SYS_PARAM_H
+# include <sys/param.h>
+# ifdef EXEC_PAGESIZE
+# define getpagesize() EXEC_PAGESIZE
+# else /* no EXEC_PAGESIZE */
+# ifdef NBPG
+# define getpagesize() NBPG * CLSIZE
+# ifndef CLSIZE
+# define CLSIZE 1
+# endif /* no CLSIZE */
+# else /* no NBPG */
+# ifdef NBPC
+# define getpagesize() NBPC
+# else /* no NBPC */
+# ifdef PAGESIZE
+# define getpagesize() PAGESIZE
+# endif /* PAGESIZE */
+# endif /* no NBPC */
+# endif /* no NBPG */
+# endif /* no EXEC_PAGESIZE */
+# else /* no HAVE_SYS_PARAM_H */
+# define getpagesize() 8192 /* punt totally */
+# endif /* no HAVE_SYS_PARAM_H */
+# endif /* no _SC_PAGESIZE */
+
+#endif /* no HAVE_GETPAGESIZE */
diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c
index 1c45c45..3ed4720 100644
--- a/gnu/usr.bin/grep/grep.c
+++ b/gnu/usr.bin/grep/grep.c
@@ -1,5 +1,5 @@
-/* grep - print lines matching an extended regular expression
- Copyright (C) 1988 Free Software Foundation, Inc.
+/* grep.c - main driver file for grep.
+ Copyright (C) 1992, 1997, 1998, 1999 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,569 +13,1069 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
-/* Written June, 1988 by Mike Haertel
- BMG speedups added July, 1988 by James A. Woods and Arthur David Olson */
-
-#include <stdio.h>
+/* Written July 1992 by Mike Haertel. */
-#if defined(USG) || defined(STDC_HEADERS)
-#include <string.h>
-#ifndef bcopy
-#define bcopy(s,d,n) memcpy((d),(s),(n))
+#ifdef HAVE_CONFIG_H
+# include <config.h>
#endif
-#ifndef index
-#define index strchr
+#include <sys/types.h>
+#include <sys/stat.h>
+#if defined(HAVE_MMAP)
+# include <sys/mman.h>
#endif
-#else
-#include <strings.h>
+#if defined(HAVE_SETRLIMIT)
+# include <sys/time.h>
+# include <sys/resource.h>
#endif
+#include <stdio.h>
+#include "system.h"
+#include "getopt.h"
+#include "getpagesize.h"
+#include "grep.h"
+#include "savedir.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
+#undef MAX
+#define MAX(A,B) ((A) > (B) ? (A) : (B))
-#ifndef STDC_HEADERS
-extern char *getenv();
+struct stats
+{
+ struct stats *parent;
+ struct stat stat;
+};
+
+/* base of chain of stat buffers, used to detect directory loops */
+static struct stats stats_base;
+
+/* if non-zero, display usage information and exit */
+static int show_help;
+
+/* If non-zero, print the version on standard output and exit. */
+static int show_version;
+
+/* Long options equivalences. */
+static struct option long_options[] =
+{
+ {"after-context", required_argument, NULL, 'A'},
+ {"basic-regexp", no_argument, NULL, 'G'},
+ {"before-context", required_argument, NULL, 'B'},
+ {"byte-offset", no_argument, NULL, 'b'},
+ {"context", optional_argument, NULL, 'C'},
+ {"count", no_argument, NULL, 'c'},
+ {"directories", required_argument, NULL, 'd'},
+ {"extended-regexp", no_argument, NULL, 'E'},
+ {"file", required_argument, NULL, 'f'},
+ {"files-with-matches", no_argument, NULL, 'l'},
+ {"files-without-match", no_argument, NULL, 'L'},
+ {"fixed-regexp", no_argument, NULL, 'F'},
+ {"fixed-strings", no_argument, NULL, 'F'},
+ {"help", no_argument, &show_help, 1},
+ {"ignore-case", no_argument, NULL, 'i'},
+ {"line-number", no_argument, NULL, 'n'},
+ {"line-regexp", no_argument, NULL, 'x'},
+ {"no-filename", no_argument, NULL, 'h'},
+ {"no-messages", no_argument, NULL, 's'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"recursive", no_argument, NULL, 'r'},
+ {"regexp", required_argument, NULL, 'e'},
+ {"revert-match", no_argument, NULL, 'v'},
+ {"silent", no_argument, NULL, 'q'},
+ {"text", no_argument, NULL, 'a'},
+#if O_BINARY
+ {"binary", no_argument, NULL, 'U'},
+ {"unix-byte-offsets", no_argument, NULL, 'u'},
+#endif
+ {"version", no_argument, NULL, 'V'},
+ {"with-filename", no_argument, NULL, 'H'},
+ {"word-regexp", no_argument, NULL, 'w'},
+ {0, 0, 0, 0}
+};
+
+/* Define flags declared in grep.h. */
+char const *matcher;
+int match_icase;
+int match_words;
+int match_lines;
+
+/* For error messages. */
+static char *prog;
+static char const *filename;
+static int errseen;
+
+/* How to handle directories. */
+static enum
+ {
+ READ_DIRECTORIES,
+ RECURSE_DIRECTORIES,
+ SKIP_DIRECTORIES
+ } directories;
+
+static int ck_atoi PARAMS ((char const *, int *));
+static void usage PARAMS ((int)) __attribute__((noreturn));
+static void error PARAMS ((const char *, int));
+static int setmatcher PARAMS ((char const *));
+static char *page_alloc PARAMS ((size_t, char **));
+static int reset PARAMS ((int, char const *, struct stats *));
+static int fillbuf PARAMS ((size_t, struct stats *));
+static int grepbuf PARAMS ((char *, char *));
+static void prtext PARAMS ((char *, char *, int *));
+static void prpending PARAMS ((char *));
+static void prline PARAMS ((char *, char *, int));
+static void print_offset_sep PARAMS ((off_t, int));
+static void nlscan PARAMS ((char *));
+static int grep PARAMS ((int, char const *, struct stats *));
+static int grepdir PARAMS ((char const *, struct stats *));
+static int grepfile PARAMS ((char const *, struct stats *));
+#if O_BINARY
+static inline int undossify_input PARAMS ((register char *, size_t));
#endif
-extern int errno;
-extern char *sys_errlist[];
+/* Functions we'll use to search. */
+static void (*compile) PARAMS ((char *, size_t));
+static char *(*execute) PARAMS ((char *, size_t, char **));
-#include "dfa.h"
-#include "regex.h"
-#include "getopt.h"
+/* Print a message and possibly an error string. Remember
+ that something awful happened. */
+static void
+error (mesg, errnum)
+ const char *mesg;
+ int errnum;
+{
+ if (errnum)
+ fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum));
+ else
+ fprintf (stderr, "%s: %s\n", prog, mesg);
+ errseen = 1;
+}
+
+/* Like error (), but die horribly after printing. */
+void
+fatal (mesg, errnum)
+ const char *mesg;
+ int errnum;
+{
+ error (mesg, errnum);
+ exit (2);
+}
-/* Used by -w */
-#define WCHAR(C) (ISALNUM(C) || (C) == '_')
+/* Interface to handle errors and fix library lossage. */
+char *
+xmalloc (size)
+ size_t size;
+{
+ char *result;
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
+ result = malloc (size);
+ if (size && !result)
+ fatal (_("memory exhausted"), 0);
+ return result;
+}
-/* Exit status codes. */
-#define MATCHES_FOUND 0 /* Exit 0 if no errors and matches found. */
-#define NO_MATCHES_FOUND 1 /* Exit 1 if no matches were found. */
-#define ERROR 2 /* Exit 2 if some error occurred. */
+/* Interface to handle errors and fix some library lossage. */
+char *
+xrealloc (ptr, size)
+ char *ptr;
+ size_t size;
+{
+ char *result;
-/* Error is set true if something awful happened. */
-static int error;
+ if (ptr)
+ result = realloc (ptr, size);
+ else
+ result = malloc (size);
+ if (size && !result)
+ fatal (_("memory exhausted"), 0);
+ return result;
+}
-/* The program name for error messages. */
-static char *prog;
+/* Convert STR to a positive integer, storing the result in *OUT.
+ If STR is not a valid integer, return -1 (otherwise 0). */
+static int
+ck_atoi (str, out)
+ char const *str;
+ int *out;
+{
+ char const *p;
+ for (p = str; *p; p++)
+ if (*p < '0' || *p > '9')
+ return -1;
+
+ *out = atoi (optarg);
+ return 0;
+}
-/* We do all our own buffering by hand for efficiency. */
-static char *buffer; /* The buffer itself, grown as needed. */
-static bufbytes; /* Number of bytes in the buffer. */
-static size_t bufalloc; /* Number of bytes allocated to the buffer. */
-static bufprev; /* Number of bytes that have been forgotten.
- This is used to get byte offsets from the
- beginning of the file. */
-static bufread; /* Number of bytes to get with each read(). */
-static void
-initialize_buffer()
+/* Hairy buffering mechanism for grep. The intent is to keep
+ all reads aligned on a page boundary and multiples of the
+ page size. */
+
+static char *ubuffer; /* Unaligned base of buffer. */
+static char *buffer; /* Base of buffer. */
+static size_t bufsalloc; /* Allocated size of buffer save region. */
+static size_t bufalloc; /* Total buffer size. */
+static int bufdesc; /* File descriptor. */
+static char *bufbeg; /* Beginning of user-visible stuff. */
+static char *buflim; /* Limit of user-visible stuff. */
+static size_t pagesize; /* alignment of memory pages */
+
+#if defined(HAVE_MMAP)
+static int bufmapped; /* True for ordinary files. */
+static off_t bufoffset; /* What read() normally remembers. */
+static off_t initial_bufoffset; /* Initial value of bufoffset. */
+#endif
+
+/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
+ an integer or a pointer. Both args must be free of side effects. */
+#define ALIGN_TO(val, alignment) \
+ ((size_t) (val) % (alignment) == 0 \
+ ? (val) \
+ : (val) + ((alignment) - (size_t) (val) % (alignment)))
+
+/* Return the address of a new page-aligned buffer of size SIZE. Set
+ *UP to the newly allocated (but possibly unaligned) buffer used to
+ *build the aligned buffer. To free the buffer, free (*UP). */
+static char *
+page_alloc (size, up)
+ size_t size;
+ char **up;
{
- bufread = 8192;
- bufalloc = bufread + bufread / 2;
- buffer = malloc(bufalloc);
- if (! buffer)
+ /* HAVE_WORKING_VALLOC means that valloc is properly declared, and
+ you can free the result of valloc. This symbol is not (yet)
+ autoconfigured. It can be useful to define HAVE_WORKING_VALLOC
+ while debugging, since some debugging memory allocators might
+ catch more bugs if this symbol is enabled. */
+#if HAVE_WORKING_VALLOC
+ *up = valloc (size);
+ return *up;
+#else
+ size_t asize = size + pagesize - 1;
+ if (size <= asize)
{
- fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
- sys_errlist[errno]);
- exit(ERROR);
+ *up = malloc (asize);
+ if (*up)
+ return ALIGN_TO (*up, pagesize);
}
+ return NULL;
+#endif
}
-/* The current input file. */
-static fd;
-static char *filename;
-static eof;
+/* Reset the buffer for a new file, returning zero if we should skip it.
+ Initialize on the first time through. */
+static int
+reset (fd, file, stats)
+ int fd;
+ char const *file;
+ struct stats *stats;
+{
+ if (pagesize == 0)
+ {
+ size_t ubufsalloc;
+ pagesize = getpagesize ();
+ if (pagesize == 0)
+ abort ();
+#ifndef BUFSALLOC
+ ubufsalloc = MAX (8192, pagesize);
+#else
+ ubufsalloc = BUFSALLOC;
+#endif
+ bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
+ bufalloc = 5 * bufsalloc;
+ /* The 1 byte of overflow is a kludge for dfaexec(), which
+ inserts a sentinel newline at the end of the buffer
+ being searched. There's gotta be a better way... */
+ if (bufsalloc < ubufsalloc
+ || bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
+ || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
+ fatal (_("memory exhausted"), 0);
+ bufbeg = buffer;
+ buflim = buffer;
+ }
+ bufdesc = fd;
+
+ if (
+#if defined(HAVE_MMAP)
+ 1
+#else
+ directories != READ_DIRECTORIES
+#endif
+ )
+ if (fstat (fd, &stats->stat) != 0)
+ {
+ error ("fstat", errno);
+ return 0;
+ }
+ if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
+ return 0;
+#if defined(HAVE_MMAP)
+ if (!S_ISREG (stats->stat.st_mode))
+ bufmapped = 0;
+ else
+ {
+ bufmapped = 1;
+ bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1);
+ }
+#endif
+ return 1;
+}
-/* Fill the buffer retaining the last n bytes at the beginning of the
- newly filled buffer (for backward context). Returns the number of new
- bytes read from disk. */
+/* Read new stuff into the buffer, saving the specified
+ amount of old stuff. When we're done, 'bufbeg' points
+ to the beginning of the buffer contents, and 'buflim'
+ points just after the end. Return count of new stuff. */
static int
-fill_buffer_retaining(n)
- int n;
+fillbuf (save, stats)
+ size_t save;
+ struct stats *stats;
{
- char *p, *q;
- int i;
+ int cc;
+#if defined(HAVE_MMAP)
+ caddr_t maddr;
+#endif
+
+ if (save > bufsalloc)
+ {
+ char *nubuffer;
+ char *nbuffer;
+
+ while (save > bufsalloc)
+ bufsalloc *= 2;
+ bufalloc = 5 * bufsalloc;
+ if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
+ || ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer)))
+ fatal (_("memory exhausted"), 0);
+
+ bufbeg = nbuffer + bufsalloc - save;
+ memcpy (bufbeg, buflim - save, save);
+ free (ubuffer);
+ ubuffer = nubuffer;
+ buffer = nbuffer;
+ }
+ else
+ {
+ bufbeg = buffer + bufsalloc - save;
+ memcpy (bufbeg, buflim - save, save);
+ }
- /* See if we need to grow the buffer. */
- if (bufalloc - n <= bufread)
+#if defined(HAVE_MMAP)
+ if (bufmapped && bufoffset % pagesize == 0
+ && stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
{
- while (bufalloc - n <= bufread)
+ maddr = buffer + bufsalloc;
+ maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
+ if (maddr == (caddr_t) -1)
{
- bufalloc *= 2;
- bufread *= 2;
+ /* This used to issue a warning, but on some hosts
+ (e.g. Solaris 2.5) mmap can fail merely because some
+ other process has an advisory read lock on the file.
+ There's no point alarming the user about this misfeature. */
+#if 0
+ fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
+ strerror (errno));
+#endif
+ goto tryread;
}
- buffer = realloc(buffer, bufalloc);
- if (! buffer)
+#if 0
+ /* You might thing this (or MADV_WILLNEED) would help,
+ but it doesn't, at least not on a Sun running 4.1.
+ In fact, it actually slows us down about 30%! */
+ madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
+#endif
+ cc = bufalloc - bufsalloc;
+ bufoffset += cc;
+ }
+ else
+ {
+ tryread:
+ /* We come here when we're not going to use mmap() any more.
+ Note that we need to synchronize the file offset the
+ first time through. */
+ if (bufmapped)
{
- fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
- sys_errlist[errno]);
- exit(ERROR);
+ bufmapped = 0;
+ if (bufoffset != initial_bufoffset)
+ lseek (bufdesc, bufoffset, 0);
}
+ cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
}
+#else
+ cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
+#endif /*HAVE_MMAP*/
+#if O_BINARY
+ if (cc > 0)
+ cc = undossify_input (buffer + bufsalloc, cc);
+#endif
+ if (cc > 0)
+ buflim = buffer + bufsalloc + cc;
+ else
+ buflim = buffer + bufsalloc;
+ return cc;
+}
- bufprev += bufbytes - n;
+/* Flags controlling the style of output. */
+static int always_text; /* Assume the input is always text. */
+static int out_quiet; /* Suppress all normal output. */
+static int out_invert; /* Print nonmatching stuff. */
+static int out_file; /* Print filenames. */
+static int out_line; /* Print line numbers. */
+static int out_byte; /* Print byte offsets. */
+static int out_before; /* Lines of leading context. */
+static int out_after; /* Lines of trailing context. */
+static int count_matches; /* Count matching lines. */
+static int list_files; /* List matching files. */
+static int no_filenames; /* Suppress file names. */
+static int suppress_errors; /* Suppress diagnostics. */
+
+/* Internal variables to keep track of byte count, context, etc. */
+static off_t totalcc; /* Total character count before bufbeg. */
+static char *lastnl; /* Pointer after last newline counted. */
+static char *lastout; /* Pointer after last character output;
+ NULL if no character has been output
+ or if it's conceptually before bufbeg. */
+static off_t totalnl; /* Total newline count before lastnl. */
+static int pending; /* Pending lines of output. */
+static int done_on_match; /* Stop scanning file on first match */
+
+#if O_BINARY
+# include "dosbuf.c"
+#endif
- /* Shift stuff down. */
- for (i = n, p = buffer, q = p + bufbytes - n; i--; )
- *p++ = *q++;
- bufbytes = n;
+static void
+nlscan (lim)
+ char *lim;
+{
+ char *beg;
- if (eof)
- return 0;
+ for (beg = lastnl; beg < lim; ++beg)
+ if (*beg == '\n')
+ ++totalnl;
+ lastnl = beg;
+}
+
+static void
+print_offset_sep (pos, sep)
+ off_t pos;
+ int sep;
+{
+ /* Do not rely on printf to print pos, since off_t may be longer than long,
+ and long long is not portable. */
+
+ char buf[sizeof pos * CHAR_BIT];
+ char *p = buf + sizeof buf - 1;
+ *p = sep;
- /* Read in new stuff. */
- i = read(fd, buffer + bufbytes, bufread);
- if (i < 0)
+ do
+ *--p = '0' + pos % 10;
+ while ((pos /= 10) != 0);
+
+ fwrite (p, 1, buf + sizeof buf - p, stdout);
+}
+
+static void
+prline (beg, lim, sep)
+ char *beg;
+ char *lim;
+ int sep;
+{
+ if (out_file)
+ printf ("%s%c", filename, sep);
+ if (out_line)
{
- fprintf(stderr, "%s: read on %s failed (%s)\n", prog,
- filename ? filename : "<stdin>", sys_errlist[errno]);
- error = 1;
+ nlscan (beg);
+ print_offset_sep (++totalnl, sep);
+ lastnl = lim;
}
-
- /* Kludge to pretend every nonempty file ends with a newline. */
- if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
+ if (out_byte)
{
- eof = i = 1;
- buffer[bufbytes] = '\n';
+ off_t pos = totalcc + (beg - bufbeg);
+#if O_BINARY
+ pos = dossified_pos (pos);
+#endif
+ print_offset_sep (pos, sep);
}
+ fwrite (beg, 1, lim - beg, stdout);
+ if (ferror (stdout))
+ error (_("writing output"), errno);
+ lastout = lim;
+}
- bufbytes += i;
- return i;
+/* Print pending lines of trailing context prior to LIM. */
+static void
+prpending (lim)
+ char *lim;
+{
+ char *nl;
+
+ if (!lastout)
+ lastout = bufbeg;
+ while (pending > 0 && lastout < lim)
+ {
+ --pending;
+ if ((nl = memchr (lastout, '\n', lim - lastout)) != 0)
+ ++nl;
+ else
+ nl = lim;
+ prline (lastout, nl, '-');
+ }
}
-
-/* Various flags set according to the argument switches. */
-static trailing_context; /* Lines of context to show after matches. */
-static leading_context; /* Lines of context to show before matches. */
-static byte_count; /* Precede output lines the byte count of the
- first character on the line. */
-static no_filenames; /* Do not display filenames. */
-static line_numbers; /* Precede output lines with line numbers. */
-static silent; /* Produce no output at all. This switch
- is bogus, ever hear of /dev/null? */
-static int whole_word; /* Match only whole words. Note that if
- backreferences are used this depends on
- the regex routines getting leftmost-longest
- right, which they don't right now if |
- is also used. */
-static int whole_line; /* Match only whole lines. Backreference
- caveat applies here too. */
-static nonmatching_lines; /* Print lines that don't match the regexp. */
-
-static bmgexec; /* Invoke Boyer-Moore-Gosper routines */
-
-/* The compiled regular expression lives here. */
-static struct regexp reg;
-
-/* The compiled regular expression for the backtracking matcher lives here. */
-static struct re_pattern_buffer regex;
-
-/* Pointer in the buffer after the last character printed. */
-static char *printed_limit;
-
-/* True when printed_limit has been artifically advanced without printing
- anything. */
-static int printed_limit_fake;
-
-/* Print a line at the given line number, returning the number of
- characters actually printed. Matching is true if the line is to
- be considered a "matching line". This is only meaningful if
- surrounding context is turned on. */
-static int
-print_line(p, number, matching)
- char *p;
- int number;
- int matching;
+
+/* Print the lines between BEG and LIM. Deal with context crap.
+ If NLINESP is non-null, store a count of lines between BEG and LIM. */
+static void
+prtext (beg, lim, nlinesp)
+ char *beg;
+ char *lim;
+ int *nlinesp;
{
- int count = 0;
+ static int used; /* avoid printing "--" before any output */
+ char *bp, *p, *nl;
+ int i, n;
+
+ if (!out_quiet && pending > 0)
+ prpending (beg);
+
+ p = beg;
- if (silent)
+ if (!out_quiet)
{
- do
- ++count;
- while (*p++ != '\n');
- printed_limit_fake = 0;
- printed_limit = p;
- return count;
+ /* Deal with leading context crap. */
+
+ bp = lastout ? lastout : bufbeg;
+ for (i = 0; i < out_before; ++i)
+ if (p > bp)
+ do
+ --p;
+ while (p > bp && p[-1] != '\n');
+
+ /* We only print the "--" separator if our output is
+ discontiguous from the last output in the file. */
+ if ((out_before || out_after) && used && p != lastout)
+ puts ("--");
+
+ while (p < beg)
+ {
+ nl = memchr (p, '\n', beg - p);
+ prline (p, nl + 1, '-');
+ p = nl + 1;
+ }
}
- if (filename && !no_filenames)
- printf("%s%c", filename, matching ? ':' : '-');
- if (byte_count)
- printf("%d%c", p - buffer + bufprev, matching ? ':' : '-');
- if (line_numbers)
- printf("%d%c", number, matching ? ':' : '-');
- do
+ if (nlinesp)
{
- ++count;
- putchar(*p);
+ /* Caller wants a line count. */
+ for (n = 0; p < lim; ++n)
+ {
+ if ((nl = memchr (p, '\n', lim - p)) != 0)
+ ++nl;
+ else
+ nl = lim;
+ if (!out_quiet)
+ prline (p, nl, ':');
+ p = nl;
+ }
+ *nlinesp = n;
}
- while (*p++ != '\n');
- printed_limit_fake = 0;
- printed_limit = p;
- return count;
+ else
+ if (!out_quiet)
+ prline (beg, lim, ':');
+
+ pending = out_after;
+ used = 1;
}
-/* Print matching or nonmatching lines from the current file. Returns a
- count of matching or nonmatching lines. */
+/* Scan the specified portion of the buffer, matching lines (or
+ between matching lines if OUT_INVERT is true). Return a count of
+ lines printed. */
static int
-grep()
+grepbuf (beg, lim)
+ char *beg;
+ char *lim;
{
- int retain = 0; /* Number of bytes to retain on next call
- to fill_buffer_retaining(). */
- char *search_limit; /* Pointer to the character after the last
- newline in the buffer. */
- char saved_char; /* Character after the last newline. */
- char *resume; /* Pointer to where to resume search. */
- int resume_index = 0; /* Count of characters to ignore after
- refilling the buffer. */
- int line_count = 1; /* Line number. */
- int try_backref; /* Set to true if we need to verify the
- match with a backtracking matcher. */
- int initial_line_count; /* Line count at beginning of last search. */
- char *match; /* Pointer to the first character after the
- string matching the regexp. */
- int match_count = 0; /* Count of matching lines. */
- char *matching_line; /* Pointer to first character of the matching
- line, or of the first line of context to
- print if context is turned on. */
- char *real_matching_line; /* Pointer to the first character of the
- real matching line. */
- char *next_line; /* Pointer to first character of the line
- following the matching line. */
- char *last_match_limit; /* Pointer after last matched line. */
- int pending_lines = 0; /* Lines of context left over from last match
- that we have to print. */
- static first_match = 1; /* True when nothing has been printed. */
- int i;
- char *tmp;
- char *execute();
+ int nlines, n;
+ register char *p, *b;
+ char *endp;
- printed_limit_fake = 0;
-
- while (fill_buffer_retaining(retain) > 0)
+ nlines = 0;
+ p = beg;
+ while ((b = (*execute)(p, lim - p, &endp)) != 0)
{
- /* Find the last newline in the buffer. */
- search_limit = buffer + bufbytes;
- while (search_limit > buffer && search_limit[-1] != '\n')
- --search_limit;
- if (search_limit == buffer)
+ /* Avoid matching the empty line at the end of the buffer. */
+ if (b == lim && ((b > beg && b[-1] == '\n') || b == beg))
+ break;
+ if (!out_invert)
{
- retain = bufbytes;
- continue;
+ prtext (b, endp, (int *) 0);
+ nlines += 1;
+ if (done_on_match)
+ return nlines;
}
+ else if (p < b)
+ {
+ prtext (p, b, &n);
+ nlines += n;
+ }
+ p = endp;
+ }
+ if (out_invert && p < lim)
+ {
+ prtext (p, lim, &n);
+ nlines += n;
+ }
+ return nlines;
+}
+
+/* Search a given file. Normally, return a count of lines printed;
+ but if the file is a directory and we search it recursively, then
+ return -2 if there was a match, and -1 otherwise. */
+static int
+grep (fd, file, stats)
+ int fd;
+ char const *file;
+ struct stats *stats;
+{
+ int nlines, i;
+ int not_text;
+ size_t residue, save;
+ char *beg, *lim;
+
+ if (!reset (fd, file, stats))
+ return 0;
- /* Save the character after the last newline so regexecute can write
- its own sentinel newline. */
- saved_char = *search_limit;
+ if (file && directories == RECURSE_DIRECTORIES
+ && S_ISDIR (stats->stat.st_mode))
+ {
+ /* Close fd now, so that we don't open a lot of file descriptors
+ when we recurse deeply. */
+ if (close (fd) != 0)
+ error (file, errno);
+ return grepdir (file, stats) - 2;
+ }
- /* Search the buffer for a match. */
- printed_limit = buffer;
- resume = buffer + resume_index;
- last_match_limit = resume;
- initial_line_count = line_count;
+ totalcc = 0;
+ lastout = 0;
+ totalnl = 0;
+ pending = 0;
+ nlines = 0;
+ residue = 0;
+ save = 0;
- /* In retrospect, I have to say that the following code sucks.
- For an example of how to do this right, see the fgrep
- driver program that I wrote around a year later. I'm
- too lazy to retrofit that to egrep right now (the
- pattern matchers have different needs). */
+ if (fillbuf (save, stats) < 0)
+ {
+ if (! (is_EISDIR (errno, file) && suppress_errors))
+ error (filename, errno);
+ return nlines;
+ }
+ not_text = (! (always_text | out_quiet)
+ && memchr (bufbeg, '\0', buflim - bufbeg));
+ done_on_match += not_text;
+ out_quiet += not_text;
- while (match = execute(&reg, resume, search_limit, 0, &line_count, &try_backref))
+ for (;;)
+ {
+ lastnl = bufbeg;
+ if (lastout)
+ lastout = bufbeg;
+ if (buflim - bufbeg == save)
+ break;
+ beg = bufbeg + save - residue;
+ for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim)
+ ;
+ residue = buflim - lim;
+ if (beg < lim)
{
- /* Find the beginning of the matching line. */
- matching_line = match;
- while (matching_line > resume && matching_line[-1] != '\n')
- --matching_line;
- real_matching_line = matching_line;
-
- /* Find the beginning of the next line. */
- next_line = match;
- while (next_line < search_limit && *next_line++ != '\n')
- ;
-
- /* If a potential backreference is indicated, try it out with
- a backtracking matcher to make sure the line is a match.
- This is hairy because we need to handle whole_line and
- whole_word matches specially. The method was stolen from
- GNU fgrep. */
- if (try_backref)
- {
- struct re_registers regs;
- int beg, len, maxlen, ret;
+ nlines += grepbuf (beg, lim);
+ if (pending)
+ prpending (lim);
+ if (nlines && done_on_match && !out_invert)
+ goto finish_grep;
+ }
+ i = 0;
+ beg = lim;
+ while (i < out_before && beg > bufbeg && beg != lastout)
+ {
+ ++i;
+ do
+ --beg;
+ while (beg > bufbeg && beg[-1] != '\n');
+ }
+ if (beg != lastout)
+ lastout = 0;
+ save = residue + lim - beg;
+ totalcc += buflim - bufbeg - save;
+ if (out_line)
+ nlscan (beg);
+ if (fillbuf (save, stats) < 0)
+ {
+ if (! (is_EISDIR (errno, file) && suppress_errors))
+ error (filename, errno);
+ goto finish_grep;
+ }
+ }
+ if (residue)
+ {
+ nlines += grepbuf (bufbeg + save - residue, buflim);
+ if (pending)
+ prpending (buflim);
+ }
- beg = 0;
- for (maxlen = next_line - matching_line - 1; beg <= maxlen; ++beg)
- {
- /* See if the matching line matches when backreferences
- are considered... */
- ret = re_search (&regex, matching_line, maxlen,
- beg, maxlen - beg, &regs);
- if (ret == -1)
- goto fail;
- beg = ret;
- len = regs.end[0] - beg;
- /* Ok, now check if it subsumed the whole line if -x */
- if (whole_line && (beg != 0 || len != maxlen))
- goto fail;
- /* If -w then check if the match aligns with word
- boundaries. We have to do this iteratively, because
- (a) The line may contain more than one occurence
- of the pattern, and;
- (b) Several alternatives in the pattern might
- be valid at a given point, and we may need to
- consider a shorter one in order to align with
- word boundaries. */
- else if (whole_word)
- while (len > 0)
- {
- /* If it's preceeded by a word constituent, then no go. */
- if (beg > 0
- && WCHAR((unsigned char) matching_line[beg - 1]))
- break;
- /* If it's followed by a word constituent, look for
- a shorter match. */
- else if (beg + len < maxlen
- && WCHAR((unsigned char) matching_line[beg + len]))
- /* This is sheer incest. */
- len = re_match_2 (&regex, (unsigned char *) 0, 0,
- matching_line, maxlen,
- beg, &regs, beg + len - 1);
- else
- goto succeed;
- }
- else
- goto succeed;
- }
- fail:
- resume = next_line;
- if (resume == search_limit)
- break;
- else
- continue;
- }
+ finish_grep:
+ done_on_match -= not_text;
+ out_quiet -= not_text;
+ if ((not_text & ~out_quiet) && nlines != 0)
+ printf (_("Binary file %s matches\n"), filename);
+ return nlines;
+}
- succeed:
- /* Print out the matching or nonmatching lines as necessary. */
- if (! nonmatching_lines)
- {
- /* Not -v, so nothing hairy... */
- ++match_count;
+static int
+grepfile (file, stats)
+ char const *file;
+ struct stats *stats;
+{
+ int desc;
+ int count;
+ int status;
- /* Print leftover trailing context from last time around. */
- while (pending_lines && last_match_limit < matching_line)
- {
- last_match_limit += print_line(last_match_limit,
- initial_line_count++,
- 0);
- --pending_lines;
- }
+ if (! file)
+ {
+ desc = 0;
+ filename = _("(standard input)");
+ }
+ else
+ {
+ desc = open (file, O_RDONLY);
- /* Back up over leading context if necessary. */
- for (i = leading_context;
- i > 0 && matching_line > printed_limit;
- --i)
+ if (desc < 0)
+ {
+ int e = errno;
+
+ if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES)
+ {
+ if (stat (file, &stats->stat) != 0)
{
- while (matching_line > printed_limit
- && (--matching_line)[-1] != '\n')
- ;
- --line_count;
+ error (file, errno);
+ return 1;
}
- /* If context is enabled, we may have to print a separator. */
- if ((leading_context || trailing_context) && !silent
- && !first_match && (printed_limit_fake
- || matching_line > printed_limit))
- printf("----------\n");
- first_match = 0;
-
- /* Print the matching line and its leading context. */
- while (matching_line < real_matching_line)
- matching_line += print_line(matching_line, line_count++, 0);
- matching_line += print_line(matching_line, line_count++, 1);
-
- /* If there's trailing context, leave some lines pending until
- next time. */
- pending_lines = trailing_context;
+ return grepdir (file, stats);
}
- else if (matching_line == last_match_limit)
+
+ if (!suppress_errors)
{
- /* In the -v case, this is where we deal with leftover
- trailing context from last time... */
- if (pending_lines > 0)
- {
- --pending_lines;
- print_line(matching_line, line_count, 0);
- }
- ++line_count;
+ if (directories == SKIP_DIRECTORIES)
+ switch (e)
+ {
+#ifdef EISDIR
+ case EISDIR:
+ return 1;
+#endif
+ case EACCES:
+ /* When skipping directories, don't worry about
+ directories that can't be opened. */
+ if (stat (file, &stats->stat) == 0
+ && S_ISDIR (stats->stat.st_mode))
+ return 1;
+ break;
+ }
+
+ error (file, e);
}
- else if (matching_line > last_match_limit)
- {
- char *start = last_match_limit;
-
- /* Back up over leading context if necessary. */
- for (i = leading_context; start > printed_limit && i; --i)
- {
- while (start > printed_limit && (--start)[-1] != '\n')
- ;
- --initial_line_count;
- }
-
- /* If context is enabled, we may have to print a separator. */
- if ((leading_context || trailing_context) && !silent
- && !first_match && (printed_limit_fake
- || start > printed_limit))
- printf("----------\n");
- first_match = 0;
- /* Print out the presumably matching leading context. */
- while (start < last_match_limit)
- start += print_line(start, initial_line_count++, 0);
-
- /* Print out the nonmatching lines prior to the matching line. */
- while (start < matching_line)
- {
- /* This counts as a "matching line" in -v. */
- ++match_count;
- start += print_line(start, initial_line_count++, 1);
- }
-
- /* Deal with trailing context. In -v what this means is
- we print the current (matching) line, marked as a non
- matching line. */
- if (trailing_context)
- {
- print_line(matching_line, line_count, 0);
- pending_lines = trailing_context - 1;
- }
+ return 1;
+ }
- /* Count the current line. */
- ++line_count;
- }
- else
- /* Let us pray this never happens... */
- abort();
+ filename = file;
+ }
- /* Resume searching at the beginning of the next line. */
- initial_line_count = line_count;
- resume = next_line;
- last_match_limit = next_line;
+#if O_BINARY
+ /* Set input to binary mode. Pipes are simulated with files
+ on DOS, so this includes the case of "foo | grep bar". */
+ if (!isatty (desc))
+ SET_BINARY (desc);
+#endif
- if (resume == search_limit)
- break;
+ count = grep (desc, file, stats);
+ if (count < 0)
+ status = count + 2;
+ else
+ {
+ if (count_matches)
+ {
+ if (out_file)
+ printf ("%s:", filename);
+ printf ("%d\n", count);
}
-
- /* Restore the saved character. */
- *search_limit = saved_char;
- if (! nonmatching_lines)
+ if (count)
{
- while (last_match_limit < search_limit && pending_lines)
- {
- last_match_limit += print_line(last_match_limit,
- initial_line_count++,
- 0);
- --pending_lines;
- }
+ status = 0;
+ if (list_files == 1)
+ printf ("%s\n", filename);
}
- else if (search_limit > last_match_limit)
+ else
{
- char *start = last_match_limit;
+ status = 1;
+ if (list_files == -1)
+ printf ("%s\n", filename);
+ }
- /* Back up over leading context if necessary. */
- for (i = leading_context; start > printed_limit && i; --i)
- {
- while (start > printed_limit && (--start)[-1] != '\n')
- ;
- --initial_line_count;
- }
+ if (file && close (desc) != 0)
+ error (file, errno);
+ }
- /* If context is enabled, we may have to print a separator. */
- if ((leading_context || trailing_context) && !silent
- && !first_match && (printed_limit_fake
- || start > printed_limit))
- printf("----------\n");
- first_match = 0;
-
- /* Print out all the nonmatching lines up to the search limit. */
- while (start < last_match_limit)
- start += print_line(start, initial_line_count++, 0);
- while (start < search_limit)
- {
- ++match_count;
- start += print_line(start, initial_line_count++, 1);
- }
+ return status;
+}
+
+static int
+grepdir (dir, stats)
+ char const *dir;
+ struct stats *stats;
+{
+ int status = 1;
+ struct stats *ancestor;
+ char *name_space;
- pending_lines = trailing_context;
- resume_index = 0;
- retain = bufbytes - (search_limit - buffer);
- continue;
+ for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
+ if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino)
+ | (ancestor->stat.st_dev ^ stats->stat.st_dev)))
+ {
+ if (!suppress_errors)
+ fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
+ _("recursive directory loop"));
+ return 1;
+ }
+
+ name_space = savedir (dir, (unsigned) stats->stat.st_size);
+
+ if (! name_space)
+ {
+ if (errno)
+ {
+ if (!suppress_errors)
+ error (dir, errno);
}
-
- /* Save the trailing end of the buffer for possible use as leading
- context in the future. */
- i = leading_context;
- tmp = search_limit;
- while (tmp > printed_limit && i--)
- while (tmp > printed_limit && (--tmp)[-1] != '\n')
- ;
- resume_index = search_limit - tmp;
- retain = bufbytes - (tmp - buffer);
- if (tmp > printed_limit)
- printed_limit_fake = 1;
+ else
+ fatal (_("Memory exhausted"), 0);
+ }
+ else
+ {
+ size_t dirlen = strlen (dir);
+ int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir)
+ || IS_SLASH (dir[dirlen - 1]));
+ char *file = NULL;
+ char *namep = name_space;
+ struct stats child;
+ child.parent = stats;
+ out_file += !no_filenames;
+ while (*namep)
+ {
+ size_t namelen = strlen (namep);
+ file = xrealloc (file, dirlen + 1 + namelen + 1);
+ strcpy (file, dir);
+ file[dirlen] = '/';
+ strcpy (file + dirlen + needs_slash, namep);
+ namep += namelen + 1;
+ status &= grepfile (file, &child);
+ }
+ out_file -= !no_filenames;
+ if (file)
+ free (file);
+ free (name_space);
}
- return match_count;
+ return status;
}
-
-void
-usage_and_die()
+
+static void
+usage(status)
+int status;
{
- fprintf(stderr, "\
-Usage: %s [-CVbchilnsvwx] [-num] [-A num] [-B num] [-f file]\n\
- [-e] expr [file...]\n", prog);
- exit(ERROR);
+ if (status != 0)
+ {
+ fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog);
+ fprintf (stderr, _("Try `%s --help' for more information.\n"), prog);
+ }
+ else
+ {
+ printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
+ printf (_("\
+Search for PATTERN in each FILE or standard input.\n\
+\n\
+Regexp selection and interpretation:\n\
+ -E, --extended-regexp PATTERN is an extended regular expression\n\
+ -F, --fixed-regexp PATTERN is a fixed string separated by newlines\n\
+ -G, --basic-regexp PATTERN is a basic regular expression\n\
+ -e, --regexp=PATTERN use PATTERN as a regular expression\n\
+ -f, --file=FILE obtain PATTERN from FILE\n\
+ -i, --ignore-case ignore case distinctions\n\
+ -w, --word-regexp force PATTERN to match only whole words\n\
+ -x, --line-regexp force PATTERN to match only whole lines\n"));
+ printf (_("\
+\n\
+Miscellaneous:\n\
+ -s, --no-messages suppress error messages\n\
+ -v, --revert-match select non-matching lines\n\
+ -V, --version print version information and exit\n\
+ --help display this help and exit\n"));
+ printf (_("\
+\n\
+Output control:\n\
+ -b, --byte-offset print the byte offset with output lines\n\
+ -n, --line-number print line number with output lines\n\
+ -H, --with-filename print the filename for each match\n\
+ -h, --no-filename suppress the prefixing filename on output\n\
+ -q, --quiet, --silent suppress all normal output\n\
+ -a, --text do not suppress binary output\n\
+ -d, --directories=ACTION how to handle directories\n\
+ ACTION is 'read', 'recurse', or 'skip'.\n\
+ -r, --recursive equivalent to --directories=recurse.\n\
+ -L, --files-without-match only print FILE names containing no match\n\
+ -l, --files-with-matches only print FILE names containing matches\n\
+ -c, --count only print a count of matching lines per FILE\n"));
+ printf (_("\
+\n\
+Context control:\n\
+ -B, --before-context=NUM print NUM lines of leading context\n\
+ -A, --after-context=NUM print NUM lines of trailing context\n\
+ -C, --context[=NUM] print NUM (default 2) lines of output context\n\
+ unless overriden by -A or -B\n\
+ -NUM same as --context=NUM\n\
+ -U, --binary do not strip CR characters at EOL (MSDOS)\n\
+ -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\
+\n\
+If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\
+With no FILE, or when FILE is -, read standard input. If less than\n\
+two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\
+Exit with 2 if syntax errors or system errors.\n"));
+ printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
+ }
+ exit (status);
}
-static char version[] = "GNU e?grep, version 1.6";
+/* Go through the matchers vector and look for the specified matcher.
+ If we find it, install it in compile and execute, and return 1. */
+static int
+setmatcher (name)
+ char const *name;
+{
+ int i;
+#ifdef HAVE_SETRLIMIT
+ struct rlimit rlim;
+#endif
+
+ for (i = 0; matchers[i].name; ++i)
+ if (strcmp (name, matchers[i].name) == 0)
+ {
+ compile = matchers[i].compile;
+ execute = matchers[i].execute;
+#if HAVE_SETRLIMIT && defined(RLIMIT_STACK)
+ /* I think every platform needs to do this, so that regex.c
+ doesn't oveflow the stack. The default value of
+ `re_max_failures' is too large for some platforms: it needs
+ more than 3MB-large stack.
+
+ The test for HAVE_SETRLIMIT should go into `configure'. */
+ if (!getrlimit (RLIMIT_STACK, &rlim))
+ {
+ long newlim;
+ extern long int re_max_failures; /* from regex.c */
+
+ /* Approximate the amount regex.c needs, plus some more. */
+ newlim = re_max_failures * 2 * 20 * sizeof (char *);
+ if (newlim > rlim.rlim_max)
+ {
+ newlim = rlim.rlim_max;
+ re_max_failures = newlim / (2 * 20 * sizeof (char *));
+ }
+ if (rlim.rlim_cur < newlim)
+ rlim.rlim_cur = newlim;
+
+ setrlimit (RLIMIT_STACK, &rlim);
+ }
+#endif
+ return 1;
+ }
+ return 0;
+}
int
-main(argc, argv)
+main (argc, argv)
int argc;
- char **argv;
+ char *argv[];
{
- int c;
- int ignore_case = 0; /* Compile the regexp to ignore case. */
- char *the_regexp = 0; /* The regular expression. */
- int regexp_len; /* Length of the regular expression. */
- char *regexp_file = 0; /* File containing parallel regexps. */
- int count_lines = 0; /* Display only a count of matching lines. */
- int list_files = 0; /* Display only the names of matching files. */
- int line_count = 0; /* Count of matching lines for a file. */
- int matches_found = 0; /* True if matches were found. */
- char *regex_errmesg; /* Error message from regex routines. */
- char translate[_NOTCHAR]; /* Translate table for case conversion
- (needed by the backtracking matcher). */
-
- if (prog = index(argv[0], '/'))
- ++prog;
- else
- prog = argv[0];
+ char *keys;
+ size_t keycc, oldcc, keyalloc;
+ int with_filenames;
+ int opt, cc, status;
+ unsigned digit_args_val, default_context;
+ FILE *fp;
+ extern char *optarg;
+ extern int optind;
+
+ initialize_main (&argc, &argv);
+ prog = argv[0];
+ if (prog && strrchr (prog, '/'))
+ prog = strrchr (prog, '/') + 1;
+
+#if defined(__MSDOS__) || defined(_WIN32)
+ /* DOS and MS-Windows use backslashes as directory separators, and usually
+ have an .exe suffix. They also have case-insensitive filesystems. */
+ if (prog)
+ {
+ char *p = prog;
+ char *bslash = strrchr (argv[0], '\\');
+
+ if (bslash && bslash >= prog) /* for mixed forward/backslash case */
+ prog = bslash + 1;
+ else if (prog == argv[0]
+ && argv[0][0] && argv[0][1] == ':') /* "c:progname" */
+ prog = argv[0] + 2;
+
+ /* Collapse the letter-case, so `strcmp' could be used hence. */
+ for ( ; *p; p++)
+ if (*p >= 'A' && *p <= 'Z')
+ *p += 'a' - 'A';
+
+ /* Remove the .exe extension, if any. */
+ if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0)
+ *p = '\0';
+ }
+#endif
- opterr = 0;
- while ((c = getopt(argc, argv, "0123456789A:B:CVbce:f:hilnsvwx")) != EOF)
- switch (c)
- {
- case '?':
- usage_and_die();
- break;
+ keys = NULL;
+ keycc = 0;
+ with_filenames = 0;
+ matcher = NULL;
+
+ /* The value -1 means to use DEFAULT_CONTEXT. */
+ out_after = out_before = -1;
+ /* Default before/after context: chaged by -C/-NUM options */
+ default_context = 0;
+ /* Accumulated value of individual digits in a -NUM option */
+ digit_args_val = 0;
+
+
+/* Internationalization. */
+#if HAVE_SETLOCALE
+ setlocale (LC_ALL, "");
+#endif
+#if ENABLE_NLS
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+#endif
+ while ((opt = getopt_long (argc, argv,
+#if O_BINARY
+ "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu",
+#else
+ "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxy",
+#endif
+ long_options, NULL)) != EOF)
+ switch (opt)
+ {
case '0':
case '1':
case '2':
@@ -586,422 +1086,239 @@ main(argc, argv)
case '7':
case '8':
case '9':
- trailing_context = 10 * trailing_context + c - '0';
- leading_context = 10 * leading_context + c - '0';
+ digit_args_val = 10 * digit_args_val + opt - '0';
+ default_context = digit_args_val;
break;
-
case 'A':
- if (! sscanf(optarg, "%d", &trailing_context)
- || trailing_context < 0)
- usage_and_die();
+ if (optarg)
+ {
+ if (ck_atoi (optarg, &out_after))
+ fatal (_("invalid context length argument"), 0);
+ }
break;
-
case 'B':
- if (! sscanf(optarg, "%d", &leading_context)
- || leading_context < 0)
- usage_and_die();
+ if (optarg)
+ {
+ if (ck_atoi (optarg, &out_before))
+ fatal (_("invalid context length argument"), 0);
+ }
break;
-
case 'C':
- trailing_context = leading_context = 2;
+ /* Set output match context, but let any explicit leading or
+ trailing amount specified with -A or -B stand. */
+ if (optarg)
+ {
+ if (ck_atoi (optarg, &default_context))
+ fatal (_("invalid context length argument"), 0);
+ }
+ else
+ default_context = 2;
break;
-
+ case 'E':
+ if (matcher && strcmp (matcher, "posix-egrep") != 0)
+ fatal (_("you may specify only one of -E, -F, or -G"), 0);
+ matcher = "posix-egrep";
+ break;
+ case 'F':
+ if (matcher && strcmp(matcher, "fgrep") != 0)
+ fatal(_("you may specify only one of -E, -F, or -G"), 0);;
+ matcher = "fgrep";
+ break;
+ case 'G':
+ if (matcher && strcmp (matcher, "grep") != 0)
+ fatal (_("you may specify only one of -E, -F, or -G"), 0);
+ matcher = "grep";
+ break;
+ case 'H':
+ with_filenames = 1;
+ break;
+#if O_BINARY
+ case 'U':
+ dos_use_file_type = DOS_BINARY;
+ break;
+ case 'u':
+ dos_report_unix_offset = 1;
+ break;
+#endif
case 'V':
- fprintf(stderr, "%s\n", version);
+ show_version = 1;
+ break;
+ case 'X':
+ if (matcher)
+ fatal (_("matcher already specified"), 0);
+ matcher = optarg;
+ break;
+ case 'a':
+ always_text = 1;
break;
-
case 'b':
- byte_count = 1;
+ out_byte = 1;
break;
-
case 'c':
- count_lines = 1;
- silent = 1;
+ out_quiet = 1;
+ count_matches = 1;
+ break;
+ case 'd':
+ if (strcmp (optarg, "read") == 0)
+ directories = READ_DIRECTORIES;
+ else if (strcmp (optarg, "skip") == 0)
+ directories = SKIP_DIRECTORIES;
+ else if (strcmp (optarg, "recurse") == 0)
+ directories = RECURSE_DIRECTORIES;
+ else
+ fatal (_("unknown directories method"), 0);
break;
-
case 'e':
- /* It doesn't make sense to mix -f and -e. */
- if (regexp_file)
- usage_and_die();
- the_regexp = optarg;
+ cc = strlen (optarg);
+ keys = xrealloc (keys, keycc + cc + 1);
+ strcpy (&keys[keycc], optarg);
+ keycc += cc;
+ keys[keycc++] = '\n';
break;
-
case 'f':
- /* It doesn't make sense to mix -f and -e. */
- if (the_regexp)
- usage_and_die();
- regexp_file = optarg;
+ fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin;
+ if (!fp)
+ fatal (optarg, errno);
+ for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
+ ;
+ keys = xrealloc (keys, keyalloc);
+ oldcc = keycc;
+ while (!feof (fp)
+ && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0)
+ {
+ keycc += cc;
+ if (keycc == keyalloc - 1)
+ keys = xrealloc (keys, keyalloc *= 2);
+ }
+ if (fp != stdin)
+ fclose(fp);
+ /* Append final newline if file ended in non-newline. */
+ if (oldcc != keycc && keys[keycc - 1] != '\n')
+ keys[keycc++] = '\n';
break;
-
case 'h':
no_filenames = 1;
break;
-
case 'i':
- ignore_case = 1;
- for (c = 0; c < _NOTCHAR; ++c)
- if (isupper(c))
- translate[c] = tolower(c);
- else
- translate[c] = c;
- regex.translate = translate;
+ case 'y': /* For old-timers . . . */
+ match_icase = 1;
+ break;
+ case 'L':
+ /* Like -l, except list files that don't contain matches.
+ Inspired by the same option in Hume's gre. */
+ out_quiet = 1;
+ list_files = -1;
+ done_on_match = 1;
break;
-
case 'l':
+ out_quiet = 1;
list_files = 1;
- silent = 1;
+ done_on_match = 1;
break;
-
case 'n':
- line_numbers = 1;
+ out_line = 1;
+ break;
+ case 'q':
+ done_on_match = 1;
+ out_quiet = 1;
+ break;
+ case 'r':
+ directories = RECURSE_DIRECTORIES;
break;
-
case 's':
- silent = 1;
+ suppress_errors = 1;
break;
-
case 'v':
- nonmatching_lines = 1;
+ out_invert = 1;
break;
-
case 'w':
- whole_word = 1;
+ match_words = 1;
break;
-
case 'x':
- whole_line = 1;
+ match_lines = 1;
+ break;
+ case 0:
+ /* long options */
break;
-
default:
- /* This can't happen. */
- fprintf(stderr, "%s: getopt(3) let one by!\n", prog);
- usage_and_die();
+ usage (2);
break;
}
- /* Set the syntax depending on whether we are EGREP or not. */
-#ifdef EGREP
- regsyntax(RE_SYNTAX_EGREP, ignore_case);
- re_set_syntax(RE_SYNTAX_EGREP);
-#else
- regsyntax(RE_SYNTAX_GREP, ignore_case);
- re_set_syntax(RE_SYNTAX_GREP);
-#endif
+ if (out_after < 0)
+ out_after = default_context;
+ if (out_before < 0)
+ out_before = default_context;
- /* Compile the regexp according to all the options. */
- if (regexp_file)
+ if (show_version)
{
- FILE *fp = fopen(regexp_file, "r");
- int len = 256;
- int i = 0;
+ printf (_("grep (GNU grep) %s\n"), VERSION);
+ printf ("\n");
+ printf (_("\
+Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
+ printf (_("\
+This is free software; see the source for copying conditions. There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
+ printf ("\n");
+ exit (0);
+ }
- if (! fp)
- {
- fprintf(stderr, "%s: %s: %s\n", prog, regexp_file,
- sys_errlist[errno]);
- exit(ERROR);
- }
+ if (show_help)
+ usage (0);
- the_regexp = malloc(len);
- while ((c = getc(fp)) != EOF)
- {
- the_regexp[i++] = c;
- if (i == len)
- the_regexp = realloc(the_regexp, len *= 2);
- }
- fclose(fp);
- /* Nuke the concluding newline so we won't match the empty string. */
- if (i > 0 && the_regexp[i - 1] == '\n')
- --i;
- regexp_len = i;
- }
- else if (! the_regexp)
+ if (keys)
{
- if (optind >= argc)
- usage_and_die();
- the_regexp = argv[optind++];
- regexp_len = strlen(the_regexp);
- }
- else
- regexp_len = strlen(the_regexp);
-
- if (whole_word || whole_line)
- {
- /* In the whole-word case, we use the pattern:
- (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
- In the whole-line case, we use the pattern:
- ^(userpattern)$.
- BUG: Using [A-Za-z_] is locale-dependent! */
-
- char *n = malloc(regexp_len + 50);
- int i = 0;
-
-#ifdef EGREP
- if (whole_word)
- strcpy(n, "(^|[^A-Za-z_])(");
- else
- strcpy(n, "^(");
-#else
- /* Todo: Make *sure* this is the right syntax. Down with grep! */
- if (whole_word)
- strcpy(n, "\\(^\\|[^A-Za-z_]\\)\\(");
- else
- strcpy(n, "^\\(");
-#endif
- i = strlen(n);
- bcopy(the_regexp, n + i, regexp_len);
- i += regexp_len;
-#ifdef EGREP
- if (whole_word)
- strcpy(n + i, ")([^A-Za-z_]|$)");
+ if (keycc == 0)
+ /* No keys were specified (e.g. -f /dev/null). Match nothing. */
+ out_invert ^= 1;
else
- strcpy(n + i, ")$");
-#else
- if (whole_word)
- strcpy(n + i, "\\)\\([^A-Za-z_]\\|$\\)");
- else
- strcpy(n + i, "\\)$");
-#endif
- i += strlen(n + i);
- regcompile(n, i, &reg, 1);
+ /* Strip trailing newline. */
+ --keycc;
}
else
- regcompile(the_regexp, regexp_len, &reg, 1);
-
-
- if (regex_errmesg = re_compile_pattern(the_regexp, regexp_len, &regex))
- regerror(regex_errmesg);
-
- /*
- Find the longest metacharacter-free string which must occur in the
- regexpr, before short-circuiting regexecute() with Boyer-Moore-Gosper.
- (Conjecture: The problem in general is NP-complete.) If there is no
- such string (like for many alternations), then default to full automaton
- search. regmust() code and heuristics [see dfa.c] courtesy
- Arthur David Olson.
- */
- if (line_numbers == 0 && nonmatching_lines == 0)
- {
- if (reg.mustn == 0 || reg.mustn == MUST_MAX ||
- index(reg.must, '\0') != reg.must + reg.mustn)
- bmgexec = 0;
- else
- {
- reg.must[reg.mustn] = '\0';
- if (getenv("MUSTDEBUG") != NULL)
- (void) printf("must have: \"%s\"\n", reg.must);
- bmg_setup(reg.must, ignore_case);
- bmgexec = 1;
- }
- }
-
- if (argc - optind < 2)
- no_filenames = 1;
-
- initialize_buffer();
-
- if (argc > optind)
- while (optind < argc)
+ if (optind < argc)
{
- bufprev = eof = 0;
- filename = argv[optind++];
- fd = open(filename, 0, 0);
- if (fd < 0)
- {
- fprintf(stderr, "%s: %s: %s\n", prog, filename,
- sys_errlist[errno]);
- error = 1;
- continue;
- }
- if (line_count = grep())
- matches_found = 1;
- close(fd);
- if (count_lines)
- if (!no_filenames)
- printf("%s:%d\n", filename, line_count);
- else
- printf("%d\n", line_count);
- else if (list_files && line_count)
- printf("%s\n", filename);
+ keys = argv[optind++];
+ keycc = strlen (keys);
}
- else
- {
- if (line_count = grep())
- matches_found = 1;
- if (count_lines)
- printf("%d\n", line_count);
- else if (list_files && line_count)
- printf("<stdin>\n");
- }
+ else
+ usage (2);
- if (error)
- exit(ERROR);
- if (matches_found)
- exit(MATCHES_FOUND);
- exit(NO_MATCHES_FOUND);
- return NO_MATCHES_FOUND;
-}
+ if (! matcher)
+ matcher = default_matcher;
-/* Needed by the regexp routines. This could be fancier, especially when
- dealing with parallel regexps in files. */
-void
-regerror(s)
- const char *s;
-{
- fprintf(stderr, "%s: %s\n", prog, s);
- exit(ERROR);
-}
+ if (!setmatcher (matcher) && !setmatcher ("default"))
+ abort ();
-/*
- bmg_setup() and bmg_search() adapted from:
- Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
- original paper (CACM, October, 1977). No delta1 or delta2. According to
- experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
- practical value. However, to improve for worst case input, integrating
- the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
- February 1986) deserves consideration.
+ (*compile)(keys, keycc);
- James A. Woods Copyleft (C) 1986, 1988
- NASA Ames Research Center
-*/
+ if ((argc - optind > 1 && !no_filenames) || with_filenames)
+ out_file = 1;
-char *
-execute(r, begin, end, newline, count, try_backref)
- struct regexp *r;
- char *begin;
- char *end;
- int newline;
- int *count;
- int *try_backref;
-{
- register char *p, *s;
- char *match;
- char *start = begin;
- char save; /* regexecute() sentinel */
- int len;
- char *bmg_search();
-
- if (!bmgexec) /* full automaton search */
- return(regexecute(r, begin, end, newline, count, try_backref));
- else
- {
- len = end - begin;
- while ((match = bmg_search((unsigned char *) start, len)) != NULL)
- {
- p = match; /* narrow search range to submatch line */
- while (p > begin && *p != '\n')
- p--;
- s = match;
- while (s < end && *s != '\n')
- s++;
- s++;
-
- save = *s;
- *s = '\0';
- match = regexecute(r, p, s, newline, count, try_backref);
- *s = save;
-
- if (match != NULL)
- return((char *) match);
- else
- {
- start = s;
- len = end - start;
- }
- }
- return(NULL);
- }
-}
-
-int delta0[256];
-unsigned char cmap[256]; /* (un)folded characters */
-unsigned char pattern[5000];
-int patlen;
-
-char *
-bmg_search(buffer, buflen)
- unsigned char *buffer;
- int buflen;
-{
- register unsigned char *k, *strend, *s, *buflim;
- register int t;
- int j;
-
- if (patlen > buflen)
- return NULL;
-
- buflim = buffer + buflen;
- if (buflen > patlen * 4)
- strend = buflim - patlen * 4;
- else
- strend = buffer;
+#if O_BINARY
+ /* Output is set to binary mode because we shouldn't convert
+ NL to CR-LF pairs, especially when grepping binary files. */
+ if (!isatty (1))
+ SET_BINARY (1);
+#endif
- s = buffer;
- k = buffer + patlen - 1;
- for (;;)
+ if (optind < argc)
{
- /* The dreaded inner loop, revisited. */
- while (k < strend && (t = delta0[*k]))
+ status = 1;
+ do
{
- k += t;
- k += delta0[*k];
- k += delta0[*k];
+ char *file = argv[optind];
+ status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
+ &stats_base);
}
- while (k < buflim && delta0[*k])
- ++k;
- if (k == buflim)
- break;
-
- j = patlen - 1;
- s = k;
- while (--j >= 0 && cmap[*--s] == pattern[j])
- ;
- /*
- delta-less shortcut for literati, but
- short shrift for genetic engineers.
- */
- if (j >= 0)
- k++;
- else /* submatch */
- return ((char *)k);
+ while ( ++optind < argc);
}
- return(NULL);
-}
-
-int
-bmg_setup(pat, folded) /* compute "boyer-moore" delta table */
- char *pat;
- int folded;
-{ /* ... HAKMEM lives ... */
- int j;
-
- patlen = strlen(pat);
-
- if (folded) /* fold case while saving pattern */
- for (j = 0; j < patlen; j++)
- pattern[j] = (isupper((int) pat[j]) ?
- (char) tolower((int) pat[j]) : pat[j]);
else
- bcopy(pat, pattern, patlen);
+ status = grepfile ((char *) NULL, &stats_base);
- for (j = 0; j < 256; j++)
- {
- delta0[j] = patlen;
- cmap[j] = (char) j; /* could be done at compile time */
- }
- for (j = 0; j < patlen - 1; j++)
- delta0[pattern[j]] = patlen - j - 1;
- delta0[pattern[patlen - 1]] = 0;
+ if (fclose (stdout) == EOF)
+ error (_("writing output"), errno);
- if (folded)
- {
- for (j = 0; j < patlen - 1; j++)
- if (islower((int) pattern[j]))
- delta0[toupper((int) pattern[j])] = patlen - j - 1;
- if (islower((int) pattern[patlen - 1]))
- delta0[toupper((int) pattern[patlen - 1])] = 0;
- for (j = 'A'; j <= 'Z'; j++)
- cmap[j] = (char) tolower((int) j);
- }
+ exit (errseen ? 2 : status);
}
diff --git a/gnu/usr.bin/grep/grep.h b/gnu/usr.bin/grep/grep.h
index a3316c5..ebd0bbc 100644
--- a/gnu/usr.bin/grep/grep.h
+++ b/gnu/usr.bin/grep/grep.h
@@ -1,5 +1,5 @@
/* grep.h - interface to grep driver for searching subroutines.
- Copyright (C) 1992 Free Software Foundation, Inc.
+ Copyright (C) 1992, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,11 +13,16 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
-#if __STDC__
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 6) || __STRICT_ANSI__
+# define __attribute__(x)
+#endif
-extern void fatal(const char *, int);
+extern void fatal PARAMS ((const char *, int)) __attribute__((noreturn));
+extern char *xmalloc PARAMS ((size_t size));
+extern char *xrealloc PARAMS ((char *ptr, size_t size));
/* Grep.c expects the matchers vector to be terminated
by an entry with a NULL name, and to contain at least
@@ -26,25 +31,15 @@ extern void fatal(const char *, int);
extern struct matcher
{
char *name;
- void (*compile)(char *, size_t);
- char *(*execute)(char *, size_t, char **);
+ void (*compile) PARAMS ((char *, size_t));
+ char *(*execute) PARAMS ((char *, size_t, char **));
} matchers[];
-#else
-
-extern void fatal();
-
-extern struct matcher
-{
- char *name;
- void (*compile)();
- char *(*execute)();
-} matchers[];
-
-#endif
-
/* Exported from grep.c. */
-extern char *matcher;
+extern char const *matcher;
+
+/* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */
+extern char const default_matcher[];
/* The following flags are exported from grep for the matchers
to look at. */
diff --git a/gnu/usr.bin/grep/kwset.c b/gnu/usr.bin/grep/kwset.c
index 9b09071..c7b088b 100644
--- a/gnu/usr.bin/grep/kwset.c
+++ b/gnu/usr.bin/grep/kwset.c
@@ -1,10 +1,9 @@
/* kwset.c - search for any of a set of keywords.
- Copyright 1989 Free Software Foundation
- Written August 1989 by Mike Haertel.
+ Copyright (C) 1989, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
+ the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
@@ -14,8 +13,10 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+/* Written August 1989 by Mike Haertel.
The author may be reached (Email) at the address mike@ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
@@ -27,43 +28,20 @@
String Matching: An Aid to Bibliographic Search," CACM June 1975,
Vol. 18, No. 6, which describes the failure function used below. */
-
-#ifdef STDC_HEADERS
-#include <limits.h>
-#include <stdlib.h>
-#else
-#define INT_MAX 2147483647
-#define UCHAR_MAX 255
-#ifdef __STDC__
-#include <stddef.h>
-#else
-#include <sys/types.h>
-#endif
-extern char *malloc();
-extern void free();
-#endif
-
-#ifdef HAVE_MEMCHR
-#include <string.h>
-#ifdef NEED_MEMORY_H
-#include <memory.h>
-#endif
-#else
-#ifdef __STDC__
-extern void *memchr();
-#else
-extern char *memchr();
-#endif
+#ifdef HAVE_CONFIG_H
+# include <config.h>
#endif
+#include <sys/types.h>
+#include "system.h"
+#include "kwset.h"
+#include "obstack.h"
#ifdef GREP
extern char *xmalloc();
-#define malloc xmalloc
+# undef malloc
+# define malloc xmalloc
#endif
-#include "kwset.h"
-#include "obstack.h"
-
#define NCHAR (UCHAR_MAX + 1)
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
@@ -106,6 +84,15 @@ struct kwset
char *trans; /* Character translation table. */
};
+/* prototypes */
+static void enqueue PARAMS((struct tree *, struct trie **));
+static void treefails PARAMS((register struct tree *, struct trie *, struct trie *));
+static void treedelta PARAMS((register struct tree *,register unsigned int, unsigned char *));
+static int hasevery PARAMS((register struct tree *, register struct tree *));
+static void treenext PARAMS((struct tree *, struct trie **));
+static char * bmexec PARAMS((kwset_t, char *, size_t));
+static char * cwexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
+
/* Allocate and initialize a keyword set object, returning an opaque
pointer to it. Return NULL if memory is not available. */
kwset_t
@@ -194,13 +181,13 @@ kwsincr(kws, text, len)
link = (struct tree *) obstack_alloc(&kwset->obstack,
sizeof (struct tree));
if (!link)
- return "memory exhausted";
+ return _("memory exhausted");
link->llink = 0;
link->rlink = 0;
link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
sizeof (struct trie));
if (!link->trie)
- return "memory exhausted";
+ return _("memory exhausted");
link->trie->accepting = 0;
link->trie->links = 0;
link->trie->parent = trie;
@@ -249,6 +236,8 @@ kwsincr(kws, text, len)
r->balance = t->balance != (char) -1 ? 0 : 1;
t->balance = 0;
break;
+ default:
+ abort ();
}
break;
case 2:
@@ -267,8 +256,12 @@ kwsincr(kws, text, len)
r->balance = t->balance != (char) -1 ? 0 : 1;
t->balance = 0;
break;
+ default:
+ abort ();
}
break;
+ default:
+ abort ();
}
if (dirs[depth - 1] == L)
@@ -591,7 +584,7 @@ bmexec(kws, text, size)
d = d1[U((tp += d)[-1])];
if (d != 0)
continue;
- if (tp[-2] == gc)
+ if (U(tp[-2]) == gc)
{
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
;
@@ -621,6 +614,10 @@ cwexec(kws, text, len, kwsmatch)
register struct tree *tree;
register char *trans;
+#ifdef lint
+ accept = NULL;
+#endif
+
/* Initialize register copies and look for easy ways out. */
kwset = (struct kwset *) kws;
if (len < kwset->mind)
@@ -758,7 +755,7 @@ cwexec(kws, text, len, kwsmatch)
}
return mch;
}
-
+
/* Search through the given text for a match of any member of the
given keyword set. Return a pointer to the first character of
the matching substring, or NULL if no match is found. If FOUNDLEN
diff --git a/gnu/usr.bin/grep/kwset.h b/gnu/usr.bin/grep/kwset.h
index 95f62e7..e699258 100644
--- a/gnu/usr.bin/grep/kwset.h
+++ b/gnu/usr.bin/grep/kwset.h
@@ -1,10 +1,9 @@
/* kwset.h - header declaring the keyword set library.
- Copyright 1989 Free Software Foundation
- Written August 1989 by Mike Haertel.
+ Copyright (C) 1989, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
+ the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
@@ -14,8 +13,10 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+/* Written August 1989 by Mike Haertel.
The author may be reached (Email) at the address mike@ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
@@ -26,24 +27,22 @@ struct kwsmatch
size_t size[1]; /* Length of each submatch. */
};
-#if __STDC__
-
-typedef void *kwset_t;
+typedef ptr_t kwset_t;
/* Return an opaque pointer to a newly allocated keyword set, or NULL
if enough memory cannot be obtained. The argument if non-NULL
specifies a table of character translations to be applied to all
pattern and search text. */
-extern kwset_t kwsalloc(char *);
+extern kwset_t kwsalloc PARAMS((char *));
/* Incrementally extend the keyword set to include the given string.
Return NULL for success, or an error message. Remember an index
number for each keyword included in the set. */
-extern char *kwsincr(kwset_t, char *, size_t);
+extern char *kwsincr PARAMS((kwset_t, char *, size_t));
/* When the keyword set has been completely built, prepare it for
use. Return NULL for success, or an error message. */
-extern char *kwsprep(kwset_t);
+extern char *kwsprep PARAMS((kwset_t));
/* Search through the given buffer for a member of the keyword set.
Return a pointer to the leftmost longest match found, or NULL if
@@ -51,19 +50,8 @@ extern char *kwsprep(kwset_t);
the matching substring in the integer it points to. Similarly,
if foundindex is non-NULL, store the index of the particular
keyword found therein. */
-extern char *kwsexec(kwset_t, char *, size_t, struct kwsmatch *);
+extern char *kwsexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *));
/* Deallocate the given keyword set and all its associated storage. */
-extern void kwsfree(kwset_t);
-
-#else
-
-typedef char *kwset_t;
-
-extern kwset_t kwsalloc();
-extern char *kwsincr();
-extern char *kwsprep();
-extern char *kwsexec();
-extern void kwsfree();
+extern void kwsfree PARAMS((kwset_t));
-#endif
diff --git a/gnu/usr.bin/grep/obstack.c b/gnu/usr.bin/grep/obstack.c
index 7b9d3b9..4258c12 100644
--- a/gnu/usr.bin/grep/obstack.c
+++ b/gnu/usr.bin/grep/obstack.c
@@ -1,5 +1,10 @@
-/* obstack.c - subroutines used implicitly by object stack macros
- Copyright (C) 1988, 1993 Free Software Foundation, Inc.
+/* obstack.h - object stack macros
+ Copyright (C) 1988,89,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
+
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+NOTE: The canonical source of this file is maintained with the
+GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -12,26 +17,42 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+along with this program; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
#include "obstack.h"
-/* This is just to get __GNU_LIBRARY__ defined. */
-#include <stdio.h>
+/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
+ incremented whenever callers compiled using an old obstack.h can no
+ longer properly call the functions in this obstack.c. */
+#define OBSTACK_INTERFACE_VERSION 1
/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
+ actually compiling the library itself, and the installed library
+ supports the same library interface we do. This code is part of the GNU
+ C Library, but also included in many other GNU distributions. Compiling
and linking in this code is a waste when using the GNU C library
(especially if it is a shared library). Rather than having every GNU
- program understand `configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
+ program understand `configure --with-gnu-libc' and omit the object
+ files, it is simpler to just do this in the source for each such file. */
+
+#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */
+#if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1
+#include <gnu-versions.h>
+#if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
-#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+#ifndef ELIDE_CODE
-#ifdef __STDC__
+
+#if defined (__STDC__) && __STDC__
#define POINTER void *
#else
#define POINTER char *
@@ -40,7 +61,7 @@ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Determine default alignment. */
struct fooalign {char x; double d;};
#define DEFAULT_ALIGNMENT \
- ((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0))
+ ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0))
/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
But in fact it might be less smart and round addresses to as much as
DEFAULT_ROUNDING. So we prepare for it to do that. */
@@ -55,6 +76,28 @@ union fooround {long x; double d;};
#define COPYING_UNIT int
#endif
+
+/* The functions allocating more room by calling `obstack_chunk_alloc'
+ jump to the handler pointed to by `obstack_alloc_failed_handler'.
+ This variable by default points to the internal function
+ `print_and_abort'. */
+#if defined (__STDC__) && __STDC__
+static void print_and_abort (void);
+void (*obstack_alloc_failed_handler) (void) = print_and_abort;
+#else
+static void print_and_abort ();
+void (*obstack_alloc_failed_handler) () = print_and_abort;
+#endif
+
+/* Exit value used when `print_and_abort' is used. */
+#if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+int obstack_exit_failure = EXIT_FAILURE;
+
/* The non-GNU-C macros copy the obstack into this global variable
to avoid multiple evaluation. */
@@ -66,37 +109,61 @@ struct obstack *_obstack;
For free, do not use ?:, since some compilers, like the MIPS compilers,
do not allow (expr) ? void : void. */
+#if defined (__STDC__) && __STDC__
+#define CALL_CHUNKFUN(h, size) \
+ (((h) -> use_extra_arg) \
+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+ : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
+
+#define CALL_FREEFUN(h, old_chunk) \
+ do { \
+ if ((h) -> use_extra_arg) \
+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+ else \
+ (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
+ } while (0)
+#else
#define CALL_CHUNKFUN(h, size) \
(((h) -> use_extra_arg) \
? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
- : (*(h)->chunkfun) ((size)))
+ : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size)))
#define CALL_FREEFUN(h, old_chunk) \
do { \
if ((h) -> use_extra_arg) \
(*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
else \
- (*(h)->freefun) ((old_chunk)); \
+ (*(void (*) ()) (h)->freefun) ((old_chunk)); \
} while (0)
+#endif
/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
Objects start on multiples of ALIGNMENT (0 means use default).
CHUNKFUN is the function to use to allocate chunks,
- and FREEFUN the function to free them. */
+ and FREEFUN the function to free them.
-void
+ Return nonzero if successful, zero if out of memory.
+ To recover from an out of memory error,
+ free up some memory, then call this again. */
+
+int
_obstack_begin (h, size, alignment, chunkfun, freefun)
struct obstack *h;
int size;
int alignment;
+#if defined (__STDC__) && __STDC__
+ POINTER (*chunkfun) (long);
+ void (*freefun) (void *);
+#else
POINTER (*chunkfun) ();
void (*freefun) ();
+#endif
{
- register struct _obstack_chunk* chunk; /* points to new chunk */
+ register struct _obstack_chunk *chunk; /* points to new chunk */
if (alignment == 0)
- alignment = DEFAULT_ALIGNMENT;
+ alignment = (int) DEFAULT_ALIGNMENT;
if (size == 0)
/* Default size is what GNU malloc can fit in a 4096-byte block. */
{
@@ -114,34 +181,48 @@ _obstack_begin (h, size, alignment, chunkfun, freefun)
size = 4096 - extra;
}
+#if defined (__STDC__) && __STDC__
+ h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+#else
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
h->freefun = freefun;
+#endif
h->chunk_size = size;
h->alignment_mask = alignment - 1;
h->use_extra_arg = 0;
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
h->next_free = h->object_base = chunk->contents;
h->chunk_limit = chunk->limit
= (char *) chunk + h->chunk_size;
chunk->prev = 0;
/* The initial chunk now contains no empty object. */
h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
}
-void
+int
_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
struct obstack *h;
int size;
int alignment;
+#if defined (__STDC__) && __STDC__
+ POINTER (*chunkfun) (POINTER, long);
+ void (*freefun) (POINTER, POINTER);
+#else
POINTER (*chunkfun) ();
void (*freefun) ();
+#endif
POINTER arg;
{
- register struct _obstack_chunk* chunk; /* points to new chunk */
+ register struct _obstack_chunk *chunk; /* points to new chunk */
if (alignment == 0)
- alignment = DEFAULT_ALIGNMENT;
+ alignment = (int) DEFAULT_ALIGNMENT;
if (size == 0)
/* Default size is what GNU malloc can fit in a 4096-byte block. */
{
@@ -159,20 +240,29 @@ _obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
size = 4096 - extra;
}
+#if defined(__STDC__) && __STDC__
+ h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+#else
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
h->freefun = freefun;
+#endif
h->chunk_size = size;
h->alignment_mask = alignment - 1;
h->extra_arg = arg;
h->use_extra_arg = 1;
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
h->next_free = h->object_base = chunk->contents;
h->chunk_limit = chunk->limit
= (char *) chunk + h->chunk_size;
chunk->prev = 0;
/* The initial chunk now contains no empty object. */
h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
}
/* Allocate a new current chunk for the obstack *H
@@ -186,12 +276,12 @@ _obstack_newchunk (h, length)
struct obstack *h;
int length;
{
- register struct _obstack_chunk* old_chunk = h->chunk;
- register struct _obstack_chunk* new_chunk;
+ register struct _obstack_chunk *old_chunk = h->chunk;
+ register struct _obstack_chunk *new_chunk;
register long new_size;
- register int obj_size = h->next_free - h->object_base;
- register int i;
- int already;
+ register long obj_size = h->next_free - h->object_base;
+ register long i;
+ long already;
/* Compute size for new chunk. */
new_size = (obj_size + length) + (obj_size >> 3) + 100;
@@ -199,7 +289,10 @@ _obstack_newchunk (h, length)
new_size = h->chunk_size;
/* Allocate and initialize the new chunk. */
- new_chunk = h->chunk = CALL_CHUNKFUN (h, new_size);
+ new_chunk = CALL_CHUNKFUN (h, new_size);
+ if (!new_chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->chunk = new_chunk;
new_chunk->prev = old_chunk;
new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
@@ -242,19 +335,25 @@ _obstack_newchunk (h, length)
This is here for debugging.
If you use it in a program, you are probably losing. */
+#if defined (__STDC__) && __STDC__
+/* Suppress -Wmissing-prototypes warning. We don't want to declare this in
+ obstack.h because it is just for debugging. */
+int _obstack_allocated_p (struct obstack *h, POINTER obj);
+#endif
+
int
_obstack_allocated_p (h, obj)
struct obstack *h;
POINTER obj;
{
- register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
- register struct _obstack_chunk* plp; /* point to previous chunk if any */
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
lp = (h)->chunk;
/* We use >= rather than > since the object cannot be exactly at
the beginning of the chunk but might be an empty object exactly
- at the end of an adjacent chunk. */
- while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
+ at the end of an adjacent chunk. */
+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
{
plp = lp->prev;
lp = plp;
@@ -275,14 +374,14 @@ _obstack_free (h, obj)
struct obstack *h;
POINTER obj;
{
- register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
- register struct _obstack_chunk* plp; /* point to previous chunk if any */
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
lp = h->chunk;
/* We use >= because there cannot be an object at the beginning of a chunk.
But there can be an empty object at that address
at the end of another chunk. */
- while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
{
plp = lp->prev;
CALL_FREEFUN (h, lp);
@@ -293,7 +392,7 @@ _obstack_free (h, obj)
}
if (lp)
{
- h->object_base = h->next_free = (char *)(obj);
+ h->object_base = h->next_free = (char *) (obj);
h->chunk_limit = lp->limit;
h->chunk = lp;
}
@@ -309,14 +408,14 @@ obstack_free (h, obj)
struct obstack *h;
POINTER obj;
{
- register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
- register struct _obstack_chunk* plp; /* point to previous chunk if any */
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
lp = h->chunk;
/* We use >= because there cannot be an object at the beginning of a chunk.
But there can be an empty object at that address
at the end of another chunk. */
- while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
{
plp = lp->prev;
CALL_FREEFUN (h, lp);
@@ -327,7 +426,7 @@ obstack_free (h, obj)
}
if (lp)
{
- h->object_base = h->next_free = (char *)(obj);
+ h->object_base = h->next_free = (char *) (obj);
h->chunk_limit = lp->limit;
h->chunk = lp;
}
@@ -336,6 +435,39 @@ obstack_free (h, obj)
abort ();
}
+int
+_obstack_memory_used (h)
+ struct obstack *h;
+{
+ register struct _obstack_chunk* lp;
+ register int nbytes = 0;
+
+ for (lp = h->chunk; lp != 0; lp = lp->prev)
+ {
+ nbytes += lp->limit - (char *) lp;
+ }
+ return nbytes;
+}
+
+/* Define the error handler. */
+#ifndef _
+# ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# ifndef _
+# define _(Str) gettext (Str)
+# endif
+# else
+# define _(Str) (Str)
+# endif
+#endif
+
+static void
+print_and_abort ()
+{
+ fputs (_("memory exhausted\n"), stderr);
+ exit (obstack_exit_failure);
+}
+
#if 0
/* These are now turned off because the applications do not use it
and it uses bcopy via obstack_grow, which causes trouble on sysV. */
@@ -343,7 +475,7 @@ obstack_free (h, obj)
/* Now define the functional versions of the obstack macros.
Define them to simply use the corresponding macros to do the job. */
-#ifdef __STDC__
+#if defined (__STDC__) && __STDC__
/* These function definitions do not work with non-ANSI preprocessors;
they won't pass through the macro names in parentheses. */
@@ -374,6 +506,13 @@ int (obstack_room) (obstack)
return obstack_room (obstack);
}
+int (obstack_make_room) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ return obstack_make_room (obstack, length);
+}
+
void (obstack_grow) (obstack, pointer, length)
struct obstack *obstack;
POINTER pointer;
@@ -451,4 +590,4 @@ POINTER (obstack_copy0) (obstack, pointer, length)
#endif /* 0 */
-#endif /* _LIBC or not __GNU_LIBRARY__. */
+#endif /* !ELIDE_CODE */
diff --git a/gnu/usr.bin/grep/obstack.h b/gnu/usr.bin/grep/obstack.h
index 8a18e45..5c03f68 100644
--- a/gnu/usr.bin/grep/obstack.h
+++ b/gnu/usr.bin/grep/obstack.h
@@ -1,5 +1,10 @@
/* obstack.h - object stack macros
- Copyright (C) 1988, 1992 Free Software Foundation, Inc.
+ Copyright (C) 1988,89,90,91,92,93,94,96,97, 98 Free Software Foundation, Inc.
+
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+NOTE: The canonical source of this file is maintained with the
+GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -12,8 +17,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+along with this program; if not, write to the Free Software Foundation,
+Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* Summary:
@@ -100,47 +105,59 @@ Summary:
/* Don't do the contents of this file more than once. */
-#ifndef __OBSTACKS__
-#define __OBSTACKS__
+#ifndef _OBSTACK_H
+#define _OBSTACK_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
-/* We use subtraction of (char *)0 instead of casting to int
+/* We use subtraction of (char *) 0 instead of casting to int
because on word-addressable machines a simple cast to int
may ignore the byte-within-word field of the pointer. */
#ifndef __PTR_TO_INT
-#define __PTR_TO_INT(P) ((P) - (char *)0)
+#define __PTR_TO_INT(P) ((P) - (char *) 0)
#endif
#ifndef __INT_TO_PTR
-#define __INT_TO_PTR(P) ((P) + (char *)0)
+#define __INT_TO_PTR(P) ((P) + (char *) 0)
#endif
/* We need the type of the resulting object. In ANSI C it is ptrdiff_t
but in traditional C it is usually long. If we are in ANSI C and
don't already have ptrdiff_t get it. */
-#if defined (__STDC__) && ! defined (offsetof)
+#if defined (__STDC__) && __STDC__ && ! defined (offsetof)
#if defined (__GNUC__) && defined (IN_GCC)
/* On Next machine, the system's stddef.h screws up if included
- after we have defined just ptrdiff_t, so include all of gstddef.h.
+ after we have defined just ptrdiff_t, so include all of stddef.h.
Otherwise, define just ptrdiff_t, which is all we need. */
#ifndef __NeXT__
#define __need_ptrdiff_t
#endif
+#endif
-/* While building GCC, the stddef.h that goes with GCC has this name. */
-#include "gstddef.h"
-#else
#include <stddef.h>
#endif
-#endif
-#ifdef __STDC__
+#if defined (__STDC__) && __STDC__
#define PTR_INT_TYPE ptrdiff_t
#else
#define PTR_INT_TYPE long
#endif
+#if defined (_LIBC) || defined (HAVE_STRING_H)
+#include <string.h>
+#define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
+#else
+#ifdef memcpy
+#define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
+#else
+#define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N))
+#endif
+#endif
+
struct _obstack_chunk /* Lives at front of each chunk. */
{
char *limit; /* 1 past end of this chunk */
@@ -151,39 +168,54 @@ struct _obstack_chunk /* Lives at front of each chunk. */
struct obstack /* control current object in current chunk */
{
long chunk_size; /* preferred size to allocate chunks in */
- struct _obstack_chunk* chunk; /* address of current struct obstack_chunk */
+ struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */
char *object_base; /* address of object we are building */
char *next_free; /* where to add next char to current object */
char *chunk_limit; /* address of char after current chunk */
PTR_INT_TYPE temp; /* Temporary for some macros. */
int alignment_mask; /* Mask of alignment for each object. */
+#if defined (__STDC__) && __STDC__
+ /* These prototypes vary based on `use_extra_arg', and we use
+ casts to the prototypeless function type in all assignments,
+ but having prototypes here quiets -Wstrict-prototypes. */
+ struct _obstack_chunk *(*chunkfun) (void *, long);
+ void (*freefun) (void *, struct _obstack_chunk *);
+ void *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+#else
struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
void (*freefun) (); /* User's function to free a chunk. */
char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+#endif
unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
unsigned maybe_empty_object:1;/* There is a possibility that the current
chunk contains a zero-length object. This
prevents freeing the chunk if we allocate
a bigger chunk to replace it. */
+ unsigned alloc_failed:1; /* No longer used, as we now call the failed
+ handler on error, but retained for binary
+ compatibility. */
};
/* Declare the external functions we use; they are in obstack.c. */
-#ifdef __STDC__
+#if defined (__STDC__) && __STDC__
extern void _obstack_newchunk (struct obstack *, int);
extern void _obstack_free (struct obstack *, void *);
-extern void _obstack_begin (struct obstack *, int, int,
- void *(*) (), void (*) ());
-extern void _obstack_begin_1 (struct obstack *, int, int,
- void *(*) (), void (*) (), void *);
+extern int _obstack_begin (struct obstack *, int, int,
+ void *(*) (long), void (*) (void *));
+extern int _obstack_begin_1 (struct obstack *, int, int,
+ void *(*) (void *, long),
+ void (*) (void *, void *), void *);
+extern int _obstack_memory_used (struct obstack *);
#else
extern void _obstack_newchunk ();
extern void _obstack_free ();
-extern void _obstack_begin ();
-extern void _obstack_begin_1 ();
+extern int _obstack_begin ();
+extern int _obstack_begin_1 ();
+extern int _obstack_memory_used ();
#endif
-#ifdef __STDC__
+#if defined (__STDC__) && __STDC__
/* Do the function-declarations after the structs
but before defining the macros. */
@@ -211,6 +243,7 @@ void * obstack_finish (struct obstack *obstack);
int obstack_object_size (struct obstack *obstack);
int obstack_room (struct obstack *obstack);
+void obstack_make_room (struct obstack *obstack, int size);
void obstack_1grow_fast (struct obstack *obstack, int data_char);
void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
void obstack_int_grow_fast (struct obstack *obstack, int data);
@@ -220,11 +253,24 @@ void * obstack_base (struct obstack *obstack);
void * obstack_next_free (struct obstack *obstack);
int obstack_alignment_mask (struct obstack *obstack);
int obstack_chunk_size (struct obstack *obstack);
+int obstack_memory_used (struct obstack *obstack);
#endif /* __STDC__ */
/* Non-ANSI C cannot really support alternative functions for these macros,
so we do not declare them. */
+
+/* Error handler called when `obstack_chunk_alloc' failed to allocate
+ more memory. This can be set to a user defined function. The
+ default action is to print a message and abort. */
+#if defined (__STDC__) && __STDC__
+extern void (*obstack_alloc_failed_handler) (void);
+#else
+extern void (*obstack_alloc_failed_handler) ();
+#endif
+
+/* Exit value used when `print_and_abort' is used. */
+extern int obstack_exit_failure;
/* Pointer to beginning of object being allocated or to be allocated next.
Note that this might not be the final address of the object
@@ -244,6 +290,35 @@ int obstack_chunk_size (struct obstack *obstack);
#define obstack_alignment_mask(h) ((h)->alignment_mask)
+/* To prevent prototype warnings provide complete argument list in
+ standard C version. */
+#if defined (__STDC__) && __STDC__
+
+#define obstack_init(h) \
+ _obstack_begin ((h), 0, 0, \
+ (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_begin(h, size) \
+ _obstack_begin ((h), (size), 0, \
+ (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
+ _obstack_begin ((h), (size), (alignment), \
+ (void *(*) (long)) (chunkfun), (void (*) (void *)) (freefun))
+
+#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+ _obstack_begin_1 ((h), (size), (alignment), \
+ (void *(*) (void *, long)) (chunkfun), \
+ (void (*) (void *, void *)) (freefun), (arg))
+
+#define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
+
+#define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
+
+#else
+
#define obstack_init(h) \
_obstack_begin ((h), 0, 0, \
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
@@ -260,12 +335,25 @@ int obstack_chunk_size (struct obstack *obstack);
_obstack_begin_1 ((h), (size), (alignment), \
(void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
+#define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun))
+
+#define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)()) (newfreefun))
+
+#endif
+
#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
#define obstack_blank_fast(h,n) ((h)->next_free += (n))
+
+#define obstack_memory_used(h) _obstack_memory_used (h)
-#if defined (__GNUC__) && defined (__STDC__)
-#if __GNUC__ < 2 || defined(NeXT)
+#if defined (__GNUC__) && defined (__STDC__) && __STDC__
+/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
+ does not implement __extension__. But that compiler doesn't define
+ __GNUC_MINOR__. */
+#if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
#define __extension__
#endif
@@ -284,18 +372,21 @@ int obstack_chunk_size (struct obstack *obstack);
({ struct obstack *__o = (OBSTACK); \
(unsigned) (__o->chunk_limit - __o->next_free); })
-/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
- so that we can avoid having void expressions
- in the arms of the conditional expression.
- Casting the third operand to void was tried before,
- but some compilers won't accept it. */
+#define obstack_make_room(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ (void) 0; })
+
#define obstack_grow(OBSTACK,where,length) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
int __len = (length); \
- ((__o->next_free + __len > __o->chunk_limit) \
- ? (_obstack_newchunk (__o, __len), 0) : 0); \
- bcopy (where, __o->next_free, __len); \
+ if (__o->next_free + __len > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len); \
+ _obstack_memcpy (__o->next_free, (char *) (where), __len); \
__o->next_free += __len; \
(void) 0; })
@@ -303,50 +394,50 @@ __extension__ \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
int __len = (length); \
- ((__o->next_free + __len + 1 > __o->chunk_limit) \
- ? (_obstack_newchunk (__o, __len + 1), 0) : 0), \
- bcopy (where, __o->next_free, __len), \
- __o->next_free += __len, \
+ if (__o->next_free + __len + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len + 1); \
+ _obstack_memcpy (__o->next_free, (char *) (where), __len); \
+ __o->next_free += __len; \
*(__o->next_free)++ = 0; \
(void) 0; })
#define obstack_1grow(OBSTACK,datum) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
- ((__o->next_free + 1 > __o->chunk_limit) \
- ? (_obstack_newchunk (__o, 1), 0) : 0), \
+ if (__o->next_free + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, 1); \
*(__o->next_free)++ = (datum); \
(void) 0; })
/* These assume that the obstack alignment is good enough for pointers or ints,
and that the data added so far to the current object
shares that much alignment. */
-
+
#define obstack_ptr_grow(OBSTACK,datum) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
- ((__o->next_free + sizeof (void *) > __o->chunk_limit) \
- ? (_obstack_newchunk (__o, sizeof (void *)), 0) : 0), \
+ if (__o->next_free + sizeof (void *) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (void *)); \
*((void **)__o->next_free)++ = ((void *)datum); \
(void) 0; })
#define obstack_int_grow(OBSTACK,datum) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
- ((__o->next_free + sizeof (int) > __o->chunk_limit) \
- ? (_obstack_newchunk (__o, sizeof (int)), 0) : 0), \
+ if (__o->next_free + sizeof (int) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (int)); \
*((int *)__o->next_free)++ = ((int)datum); \
(void) 0; })
-#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr)
-#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
+#define obstack_ptr_grow_fast(h,aptr) (*((void **) (h)->next_free)++ = (void *)aptr)
+#define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint)
#define obstack_blank(OBSTACK,length) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
int __len = (length); \
- ((__o->chunk_limit - __o->next_free < __len) \
- ? (_obstack_newchunk (__o, __len), 0) : 0); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
__o->next_free += __len; \
(void) 0; })
@@ -373,15 +464,16 @@ __extension__ \
#define obstack_finish(OBSTACK) \
__extension__ \
({ struct obstack *__o1 = (OBSTACK); \
- void *value = (void *) __o1->object_base; \
+ void *value; \
+ value = (void *) __o1->object_base; \
if (__o1->next_free == value) \
__o1->maybe_empty_object = 1; \
__o1->next_free \
= __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
& ~ (__o1->alignment_mask)); \
- ((__o1->next_free - (char *)__o1->chunk \
- > __o1->chunk_limit - (char *)__o1->chunk) \
- ? (__o1->next_free = __o1->chunk_limit) : 0); \
+ if (__o1->next_free - (char *)__o1->chunk \
+ > __o1->chunk_limit - (char *)__o1->chunk) \
+ __o1->next_free = __o1->chunk_limit; \
__o1->object_base = __o1->next_free; \
value; })
@@ -401,44 +493,55 @@ __extension__ \
#define obstack_room(h) \
(unsigned) ((h)->chunk_limit - (h)->next_free)
+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
+ so that we can avoid having void expressions
+ in the arms of the conditional expression.
+ Casting the third operand to void was tried before,
+ but some compilers won't accept it. */
+
+#define obstack_make_room(h,length) \
+( (h)->temp = (length), \
+ (((h)->next_free + (h)->temp > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0))
+
#define obstack_grow(h,where,length) \
( (h)->temp = (length), \
(((h)->next_free + (h)->temp > (h)->chunk_limit) \
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
- bcopy (where, (h)->next_free, (h)->temp), \
+ _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \
(h)->next_free += (h)->temp)
#define obstack_grow0(h,where,length) \
( (h)->temp = (length), \
(((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
- bcopy (where, (h)->next_free, (h)->temp), \
+ _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \
(h)->next_free += (h)->temp, \
*((h)->next_free)++ = 0)
#define obstack_1grow(h,datum) \
( (((h)->next_free + 1 > (h)->chunk_limit) \
? (_obstack_newchunk ((h), 1), 0) : 0), \
- *((h)->next_free)++ = (datum))
+ (*((h)->next_free)++ = (datum)))
#define obstack_ptr_grow(h,datum) \
( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
- *((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum))
+ (*((char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *) datum)))
#define obstack_int_grow(h,datum) \
( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
- *((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum))
+ (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = ((int) datum)))
-#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr)
-#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
+#define obstack_ptr_grow_fast(h,aptr) (*((char **) (h)->next_free)++ = (char *) aptr)
+#define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint)
#define obstack_blank(h,length) \
( (h)->temp = (length), \
(((h)->chunk_limit - (h)->next_free < (h)->temp) \
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
- (h)->next_free += (h)->temp)
+ ((h)->next_free += (h)->temp))
#define obstack_alloc(h,length) \
(obstack_blank ((h), (length)), obstack_finish ((h)))
@@ -457,22 +560,22 @@ __extension__ \
(h)->next_free \
= __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
& ~ ((h)->alignment_mask)), \
- (((h)->next_free - (char *)(h)->chunk \
- > (h)->chunk_limit - (char *)(h)->chunk) \
+ (((h)->next_free - (char *) (h)->chunk \
+ > (h)->chunk_limit - (char *) (h)->chunk) \
? ((h)->next_free = (h)->chunk_limit) : 0), \
(h)->object_base = (h)->next_free, \
__INT_TO_PTR ((h)->temp))
-#ifdef __STDC__
+#if defined (__STDC__) && __STDC__
#define obstack_free(h,obj) \
-( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
+( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
? (int) ((h)->next_free = (h)->object_base \
= (h)->temp + (char *) (h)->chunk) \
: (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
#else
#define obstack_free(h,obj) \
-( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
+( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
? (int) ((h)->next_free = (h)->object_base \
= (h)->temp + (char *) (h)->chunk) \
@@ -481,4 +584,8 @@ __extension__ \
#endif /* not __GNUC__ or not __STDC__ */
-#endif /* not __OBSTACKS__ */
+#ifdef __cplusplus
+} /* C++ */
+#endif
+
+#endif /* obstack.h */
diff --git a/gnu/usr.bin/grep/savedir.c b/gnu/usr.bin/grep/savedir.c
new file mode 100644
index 0000000..5a9c339
--- /dev/null
+++ b/gnu/usr.bin/grep/savedir.c
@@ -0,0 +1,135 @@
+/* savedir.c -- save the list of files in a directory in a string
+ Copyright (C) 1990, 1997, 1998 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+
+#ifdef CLOSEDIR_VOID
+/* Fake a return value. */
+# define CLOSEDIR(d) (closedir (d), 0)
+#else
+# define CLOSEDIR(d) closedir (d)
+#endif
+
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <string.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+#ifndef NULL
+# define NULL 0
+#endif
+
+#ifndef stpcpy
+char *stpcpy ();
+#endif
+
+#include "savedir.h"
+
+/* Return a freshly allocated string containing the filenames
+ in directory DIR, separated by '\0' characters;
+ the end is marked by two '\0' characters in a row.
+ NAME_SIZE is the number of bytes to initially allocate
+ for the string; it will be enlarged as needed.
+ Return NULL if DIR cannot be opened or if out of memory. */
+
+char *
+savedir (dir, name_size)
+ const char *dir;
+ unsigned int name_size;
+{
+ DIR *dirp;
+ struct dirent *dp;
+ char *name_space;
+ char *namep;
+
+ dirp = opendir (dir);
+ if (dirp == NULL)
+ return NULL;
+
+ name_space = (char *) malloc (name_size);
+ if (name_space == NULL)
+ {
+ closedir (dirp);
+ return NULL;
+ }
+ namep = name_space;
+
+ while ((dp = readdir (dirp)) != NULL)
+ {
+ /* Skip "." and ".." (some NFS filesystems' directories lack them). */
+ if (dp->d_name[0] != '.'
+ || (dp->d_name[1] != '\0'
+ && (dp->d_name[1] != '.' || dp->d_name[2] != '\0')))
+ {
+ unsigned size_needed = (namep - name_space) + NAMLEN (dp) + 2;
+
+ if (size_needed > name_size)
+ {
+ char *new_name_space;
+
+ while (size_needed > name_size)
+ name_size += 1024;
+
+ new_name_space = realloc (name_space, name_size);
+ if (new_name_space == NULL)
+ {
+ closedir (dirp);
+ return NULL;
+ }
+ namep += new_name_space - name_space;
+ name_space = new_name_space;
+ }
+ namep = stpcpy (namep, dp->d_name) + 1;
+ }
+ }
+ *namep = '\0';
+ if (CLOSEDIR (dirp))
+ {
+ free (name_space);
+ return NULL;
+ }
+ return name_space;
+}
diff --git a/gnu/usr.bin/grep/savedir.h b/gnu/usr.bin/grep/savedir.h
new file mode 100644
index 0000000..033e567
--- /dev/null
+++ b/gnu/usr.bin/grep/savedir.h
@@ -0,0 +1,15 @@
+#if !defined SAVEDIR_H_
+# define SAVEDIR_H_
+
+# ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+# endif
+
+char *
+savedir PARAMS ((const char *dir, unsigned int name_size));
+
+#endif
diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c
index d2be489..cf51e3b 100644
--- a/gnu/usr.bin/grep/search.c
+++ b/gnu/usr.bin/grep/search.c
@@ -1,5 +1,5 @@
/* search.c - searching subroutines using dfa, kwset and regex for grep.
- Copyright (C) 1992 Free Software Foundation, Inc.
+ Copyright (C) 1992, 1998 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,80 +13,38 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
- Written August 1992 by Mike Haertel. */
+/* Written August 1992 by Mike Haertel. */
-#include <ctype.h>
-
-#ifdef STDC_HEADERS
-#include <limits.h>
-#include <stdlib.h>
-#else
-#define UCHAR_MAX 255
-#include <sys/types.h>
-extern char *malloc();
-#endif
-
-#ifdef HAVE_MEMCHR
-#include <string.h>
-#ifdef NEED_MEMORY_H
-#include <memory.h>
+#ifdef HAVE_CONFIG_H
+# include <config.h>
#endif
-#else
-#ifdef __STDC__
-extern void *memchr();
-#else
-extern char *memchr();
-#endif
-#endif
-
-#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
-#undef bcopy
-#define bcopy(s, d, n) memcpy((d), (s), (n))
-#endif
-
-#ifdef isascii
-#define ISALNUM(C) (isascii(C) && isalnum(C))
-#define ISUPPER(C) (isascii(C) && isupper(C))
-#else
-#define ISALNUM(C) isalnum(C)
-#define ISUPPER(C) isupper(C)
-#endif
-
-#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
-
+#include <sys/types.h>
+#include "system.h"
#include "grep.h"
+#include "regex.h"
#include "dfa.h"
#include "kwset.h"
-#include "regex.h"
#define NCHAR (UCHAR_MAX + 1)
-#if __STDC__
-static void Gcompile(char *, size_t);
-static void Ecompile(char *, size_t);
-static char *EGexecute(char *, size_t, char **);
-static void Fcompile(char *, size_t);
-static char *Fexecute(char *, size_t, char **);
-#else
-static void Gcompile();
-static void Ecompile();
-static char *EGexecute();
-static void Fcompile();
-static char *Fexecute();
-#endif
+static void Gcompile PARAMS((char *, size_t));
+static void Ecompile PARAMS((char *, size_t));
+static char *EGexecute PARAMS((char *, size_t, char **));
+static void Fcompile PARAMS((char *, size_t));
+static char *Fexecute PARAMS((char *, size_t, char **));
+static void kwsinit PARAMS((void));
/* Here is the matchers vector for the main program. */
struct matcher matchers[] = {
{ "default", Gcompile, EGexecute },
{ "grep", Gcompile, EGexecute },
- { "ggrep", Gcompile, EGexecute },
{ "egrep", Ecompile, EGexecute },
{ "posix-egrep", Ecompile, EGexecute },
- { "gegrep", Ecompile, EGexecute },
+ { "awk", Ecompile, EGexecute },
{ "fgrep", Fcompile, Fexecute },
- { "gfgrep", Fcompile, Fexecute },
{ 0, 0, 0 },
};
@@ -111,7 +69,7 @@ static int lastexact;
void
dfaerror(mesg)
- char *mesg;
+ const char *mesg;
{
fatal(mesg, 0);
}
@@ -128,7 +86,7 @@ kwsinit()
if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
fatal("memory exhausted", 0);
-}
+}
/* If the DFA turns out to have some set of fixed strings one of
which must occur in the match, then we build a kwset matcher
@@ -173,10 +131,7 @@ Gcompile(pattern, size)
char *pattern;
size_t size;
{
-#ifdef __STDC__
- const
-#endif
- char *err;
+ const char *err;
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
@@ -184,8 +139,6 @@ Gcompile(pattern, size)
if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
fatal(err, 0);
- dfainit(&dfa);
-
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
Then if DFA succeeds we do some hairy stuff using the regex matcher
@@ -209,7 +162,7 @@ Gcompile(pattern, size)
strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
i = strlen(n);
- bcopy(pattern, n + i, size);
+ memcpy(n + i, pattern, size);
i += size;
if (match_words)
@@ -231,16 +184,18 @@ Ecompile(pattern, size)
char *pattern;
size_t size;
{
-#ifdef __STDC__
- const
-#endif
- char *err;
+ const char *err;
if (strcmp(matcher, "posix-egrep") == 0)
{
re_set_syntax(RE_SYNTAX_POSIX_EGREP);
dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
}
+ else if (strcmp(matcher, "awk") == 0)
+ {
+ re_set_syntax(RE_SYNTAX_AWK);
+ dfasyntax(RE_SYNTAX_AWK, match_icase);
+ }
else
{
re_set_syntax(RE_SYNTAX_EGREP);
@@ -250,8 +205,6 @@ Ecompile(pattern, size)
if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
fatal(err, 0);
- dfainit(&dfa);
-
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
Then if DFA succeeds we do some hairy stuff using the regex matcher
@@ -275,7 +228,7 @@ Ecompile(pattern, size)
strcpy(n, "(^|[^0-9A-Za-z_])(");
i = strlen(n);
- bcopy(pattern, n + i, size);
+ memcpy(n + i, pattern, size);
i += size;
if (match_words)
@@ -358,7 +311,8 @@ EGexecute(buf, size, endp)
if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0)
{
len = regs.end[0] - start;
- if (!match_lines && !match_words || match_lines && len == end - beg)
+ if ((!match_lines && !match_words)
+ || (match_lines && len == end - beg))
goto success;
/* If -w, check if the match aligns with word boundaries.
We do this iteratively because:
@@ -369,8 +323,9 @@ EGexecute(buf, size, endp)
if (match_words)
while (start >= 0)
{
- if ((start == 0 || !WCHAR(beg[start - 1]))
- && (len == end - beg || !WCHAR(beg[start + len])))
+ if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+ && (len == end - beg
+ || !WCHAR ((unsigned char) beg[start + len])))
goto success;
if (len > 0)
{
diff --git a/gnu/usr.bin/grep/stpcpy.c b/gnu/usr.bin/grep/stpcpy.c
new file mode 100644
index 0000000..380e65f
--- /dev/null
+++ b/gnu/usr.bin/grep/stpcpy.c
@@ -0,0 +1,52 @@
+/* stpcpy.c -- copy a string and return pointer to end of new string
+ Copyright (C) 1992, 1995, 1997, 1998 Free Software Foundation, Inc.
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#undef __stpcpy
+#undef stpcpy
+
+#ifndef weak_alias
+# define __stpcpy stpcpy
+#endif
+
+/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */
+char *
+__stpcpy (dest, src)
+ char *dest;
+ const char *src;
+{
+ register char *d = dest;
+ register const char *s = src;
+
+ do
+ *d++ = *s;
+ while (*s++ != '\0');
+
+ return d - 1;
+}
+#ifdef weak_alias
+weak_alias (__stpcpy, stpcpy)
+#endif
diff --git a/gnu/usr.bin/grep/system.h b/gnu/usr.bin/grep/system.h
new file mode 100644
index 0000000..be01791
--- /dev/null
+++ b/gnu/usr.bin/grep/system.h
@@ -0,0 +1,188 @@
+/* Portability cruft. Include after config.h and sys/types.h.
+ Copyright (C) 1996, 1998 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#undef PARAMS
+#if defined (__STDC__) && __STDC__
+# ifndef _PTR_T
+# define _PTR_T
+ typedef void * ptr_t;
+# endif
+# define PARAMS(x) x
+#else
+# ifndef _PTR_T
+# define _PTR_T
+ typedef char * ptr_t;
+# endif
+# define PARAMS(x) ()
+#endif
+
+#ifdef HAVE_UNISTD_H
+# include <fcntl.h>
+# include <unistd.h>
+#else
+# define O_RDONLY 0
+int open(), read(), close();
+#endif
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+
+#ifndef HAVE_STRERROR
+extern int sys_nerr;
+extern char *sys_errlist[];
+# define strerror(E) (0 <= (E) && (E) < sys_nerr ? _(sys_errlist[E]) : _("Unknown system error"))
+#endif
+
+/* Some operating systems treat text and binary files differently. */
+#if O_BINARY
+# include <io.h>
+# ifdef HAVE_SETMODE
+# define SET_BINARY(fd) setmode (fd, O_BINARY)
+# else
+# define SET_BINARY(fd) _setmode (fd, O_BINARY)
+# endif
+#else
+# ifndef O_BINARY
+# define O_BINARY 0
+# define SET_BINARY(fd) (void)0
+# endif
+#endif
+
+#ifdef HAVE_DOS_FILE_NAMES
+# define IS_SLASH(c) ((c) == '/' || (c) == '\\')
+# define FILESYSTEM_PREFIX_LEN(f) ((f)[0] && (f)[1] == ':' ? 2 : 0)
+#endif
+
+#ifndef IS_SLASH
+# define IS_SLASH(c) ((c) == '/')
+#endif
+
+#ifndef FILESYSTEM_PREFIX_LEN
+# define FILESYSTEM_PREFIX_LEN(f) 0
+#endif
+
+/* This assumes _WIN32, like DJGPP, has D_OK. Does it? In what header? */
+#ifdef D_OK
+# ifdef EISDIR
+# define is_EISDIR(e, f) \
+ ((e) == EISDIR \
+ || ((e) == EACCES && access (f, D_OK) == 0 && ((e) = EISDIR, 1)))
+# else
+# define is_EISDIR(e, f) ((e) == EACCES && access (f, D_OK) == 0)
+# endif
+#endif
+
+#ifndef is_EISDIR
+# ifdef EISDIR
+# define is_EISDIR(e, f) ((e) == EISDIR)
+# else
+# define is_EISDIR(e, f) 0
+# endif
+#endif
+
+#if STAT_MACROS_BROKEN
+# undef S_ISDIR
+#endif
+#if !defined(S_ISDIR) && defined(S_IFDIR)
+# define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR)
+#endif
+
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+#else
+ptr_t malloc(), realloc(), calloc();
+void free();
+#endif
+
+#if __STDC__
+# include <stddef.h>
+#endif
+#ifdef STDC_HEADERS
+# include <limits.h>
+#endif
+#ifndef CHAR_BIT
+# define CHAR_BIT 8
+#endif
+#ifndef INT_MAX
+# define INT_MAX 2147483647
+#endif
+#ifndef UCHAR_MAX
+# define UCHAR_MAX 255
+#endif
+
+#if !defined(STDC_HEADERS) && defined(HAVE_STRING_H) && defined(HAVE_MEMORY_H)
+# include <memory.h>
+#endif
+#if defined(STDC_HEADERS) || defined(HAVE_STRING_H)
+# include <string.h>
+#else
+# include <strings.h>
+# undef strchr
+# define strchr index
+# undef strrchr
+# define strrchr rindex
+# undef memcpy
+# define memcpy(d, s, n) bcopy((s), (d), (n))
+#endif
+#ifndef HAVE_MEMCHR
+ptr_t memchr();
+#endif
+
+#include <ctype.h>
+
+#ifndef isgraph
+# define isgraph(C) (isprint(C) && !isspace(C))
+#endif
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+# define IN_CTYPE_DOMAIN(c) 1
+#else
+# define IN_CTYPE_DOMAIN(c) isascii(c)
+#endif
+
+#define ISALPHA(C) (IN_CTYPE_DOMAIN (C) && isalpha (C))
+#define ISUPPER(C) (IN_CTYPE_DOMAIN (C) && isupper (C))
+#define ISLOWER(C) (IN_CTYPE_DOMAIN (C) && islower (C))
+#define ISDIGIT(C) (IN_CTYPE_DOMAIN (C) && isdigit (C))
+#define ISXDIGIT(C) (IN_CTYPE_DOMAIN (C) && isxdigit (C))
+#define ISSPACE(C) (IN_CTYPE_DOMAIN (C) && isspace (C))
+#define ISPUNCT(C) (IN_CTYPE_DOMAIN (C) && ispunct (C))
+#define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C))
+#define ISPRINT(C) (IN_CTYPE_DOMAIN (C) && isprint (C))
+#define ISGRAPH(C) (IN_CTYPE_DOMAIN (C) && isgraph (C))
+#define ISCNTRL(C) (IN_CTYPE_DOMAIN (C) && iscntrl (C))
+
+#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(String) gettext (String)
+#else
+# define _(String) String
+#endif
+#define N_(String) String
+
+#if HAVE_SETLOCALE
+# include <locale.h>
+#endif
+
+#ifndef initialize_main
+#define initialize_main(argcp, argvp)
+#endif
OpenPOWER on IntegriCloud