Merge revision 1.2 (unspam l10n ranges check) into Gawk 3.1.0.

author: obrien <obrien@FreeBSD.org> 2001-11-02 21:34:12 +0000
committer: obrien <obrien@FreeBSD.org> 2001-11-02 21:34:12 +0000
commit: dc6f25b547f30f5e80e003f51948161c0cbe77d9 (patch)
tree: c71ed68868bfd0faea4346c572cafb502875beee /contrib/awk
parent: f99bc3baaa551ba49e40c2deb3b237bf89c299f0 (diff)
download: FreeBSD-src-dc6f25b547f30f5e80e003f51948161c0cbe77d9.zip
FreeBSD-src-dc6f25b547f30f5e80e003f51948161c0cbe77d9.tar.gz
1 files changed, 271 insertions, 287 deletions
diff --git a/contrib/awk/dfa.c b/contrib/awk/dfa.c
index c806bea..51d0efd 100644
--- a/contrib/awk/dfa.c
+++ b/contrib/awk/dfa.c
@@ -1,5 +1,5 @@
 /* dfa.c - deterministic extended regexp routines for GNU
-   Copyright (C) 1988 Free Software Foundation, Inc.
+   Copyright 1988, 1998, 2000 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -18,6 +18,9 @@
 /* Written June, 1988 by Mike Haertel
    Modified July, 1988 by Arthur David Olson to assist BMG speedups  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -26,10 +29,14 @@
 #include <ctype.h>
 #include <stdio.h>
 
+#ifndef VMS
+#include <sys/types.h>
+#else
+#include <stddef.h>
+#endif
 #ifdef STDC_HEADERS
 #include <stdlib.h>
 #else
-#include <sys/types.h>
 extern char *calloc(), *malloc(), *realloc();
 extern void free();
 #endif
@@ -77,6 +84,29 @@ extern void free();
 #define ISCNTRL(C) (isascii(C) && iscntrl(C))
 #endif
 
+/* ISASCIIDIGIT differs from ISDIGIT, as follows:
+   - Its arg may be any int or unsigned int; it need not be an unsigned char.
+   - It's guaranteed to evaluate its argument exactly once.
+   - It's typically faster.
+   Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
+   only '0' through '9' are digits.  Prefer ISASCIIDIGIT to ISDIGIT unless
+   it's important to use the locale's definition of `digit' even when the
+   host does not conform to Posix.  */
+#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
+
+/* If we (don't) have I18N.  */
+/* glibc defines _ */
+#ifndef _
+# ifdef HAVE_LIBINTL_H
+#  include <libintl.h>
+#  ifndef _
+#   define _(Str) gettext (Str)
+#  endif
+# else
+#  define _(Str) (Str)
+# endif
+#endif
+
 #ifndef __FreeBSD__
 #include "regex.h"
 #else
@@ -84,57 +114,56 @@ extern void free();
 #endif
 #include "dfa.h"
 
-#ifdef __STDC__
-typedef void *ptr_t;
-#else
-typedef char *ptr_t;
-#ifndef const
-#define const
+/* HPUX, define those as macros in sys/param.h */
+#ifdef setbit
+# undef setbit
 #endif
+#ifdef clrbit
+# undef clrbit
 #endif
 
-static void dfamust _RE_ARGS((struct dfa *dfa));
+static void dfamust PARAMS ((struct dfa *dfa));
 
-static ptr_t xcalloc _RE_ARGS((size_t n, size_t s));
-static ptr_t xmalloc _RE_ARGS((size_t n));
-static ptr_t xrealloc _RE_ARGS((ptr_t p, size_t n));
+static ptr_t xcalloc PARAMS ((size_t n, size_t s));
+static ptr_t xmalloc PARAMS ((size_t n));
+static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
 #ifdef DEBUG
-static void prtok _RE_ARGS((token t));
+static void prtok PARAMS ((token t));
 #endif
-static int tstbit _RE_ARGS((int b, charclass c));
-static void setbit _RE_ARGS((int b, charclass c));
-static void clrbit _RE_ARGS((int b, charclass c));
-static void copyset _RE_ARGS((charclass src, charclass dst));
-static void zeroset _RE_ARGS((charclass s));
-static void notset _RE_ARGS((charclass s));
-static int equal _RE_ARGS((charclass s1, charclass s2));
-static int charclass_index _RE_ARGS((charclass s));
-static int looking_at _RE_ARGS((const char *s));
-static token lex _RE_ARGS((void));
-static void addtok _RE_ARGS((token t));
-static void atom _RE_ARGS((void));
-static int nsubtoks _RE_ARGS((int tindex));
-static void copytoks _RE_ARGS((int tindex, int ntokens));
-static void closure _RE_ARGS((void));
-static void branch _RE_ARGS((void));
-static void regexp _RE_ARGS((int toplevel));
-static void copy _RE_ARGS((position_set *src, position_set *dst));
-static void insert _RE_ARGS((position p, position_set *s));
-static void merge _RE_ARGS((position_set *s1, position_set *s2, position_set *m));
-static void delete _RE_ARGS((position p, position_set *s));
-static int state_index _RE_ARGS((struct dfa *d, position_set *s,
+static int tstbit PARAMS ((int b, charclass c));
+static void setbit PARAMS ((int b, charclass c));
+static void clrbit PARAMS ((int b, charclass c));
+static void copyset PARAMS ((charclass src, charclass dst));
+static void zeroset PARAMS ((charclass s));
+static void notset PARAMS ((charclass s));
+static int equal PARAMS ((charclass s1, charclass s2));
+static int charclass_index PARAMS ((charclass s));
+static int looking_at PARAMS ((const char *s));
+static token lex PARAMS ((void));
+static void addtok PARAMS ((token t));
+static void atom PARAMS ((void));
+static int nsubtoks PARAMS ((int tindex));
+static void copytoks PARAMS ((int tindex, int ntokens));
+static void closure PARAMS ((void));
+static void branch PARAMS ((void));
+static void regexp PARAMS ((int toplevel));
+static void copy PARAMS ((position_set *src, position_set *dst));
+static void insert PARAMS ((position p, position_set *s));
+static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
+static void delete PARAMS ((position p, position_set *s));
+static int state_index PARAMS ((struct dfa *d, position_set *s,
 			  int newline, int letter));
-static void build_state _RE_ARGS((int s, struct dfa *d));
-static void build_state_zero _RE_ARGS((struct dfa *d));
-static char *icatalloc _RE_ARGS((char *old, char *new));
-static char *icpyalloc _RE_ARGS((char *string));
-static char *istrstr _RE_ARGS((char *lookin, char *lookfor));
-static void ifree _RE_ARGS((char *cp));
-static void freelist _RE_ARGS((char **cpp));
-static char **enlist _RE_ARGS((char **cpp, char *new, size_t len));
-static char **comsubs _RE_ARGS((char *left, char *right));
-static char **addlists _RE_ARGS((char **old, char **new));
-static char **inboth _RE_ARGS((char **left, char **right));
+static void build_state PARAMS ((int s, struct dfa *d));
+static void build_state_zero PARAMS ((struct dfa *d));
+static char *icatalloc PARAMS ((char *old, char *new));
+static char *icpyalloc PARAMS ((char *string));
+static char *istrstr PARAMS ((char *lookin, char *lookfor));
+static void ifree PARAMS ((char *cp));
+static void freelist PARAMS ((char **cpp));
+static char **enlist PARAMS ((char **cpp, char *new, size_t len));
+static char **comsubs PARAMS ((char *left, char *right));
+static char **addlists PARAMS ((char **old, char **new));
+static char **inboth PARAMS ((char **left, char **right));
 
 #ifdef __FreeBSD__
 static int collate_range_cmp (a, b)
@@ -154,39 +183,34 @@ static int collate_range_cmp (a, b)
 #endif
 
 static ptr_t
-xcalloc(n, s)
-     size_t n;
-     size_t s;
+xcalloc (size_t n, size_t s)
 {
   ptr_t r = calloc(n, s);
 
   if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
   return r;
 }
 
 static ptr_t
-xmalloc(n)
-     size_t n;
+xmalloc (size_t n)
 {
   ptr_t r = malloc(n);
 
   assert(n != 0);
   if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
   return r;
 }
 
 static ptr_t
-xrealloc(p, n)
-     ptr_t p;
-     size_t n;
+xrealloc (ptr_t p, size_t n)
 {
   ptr_t r = realloc(p, n);
 
   assert(n != 0);
   if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
   return r;
 }
 
@@ -206,8 +230,7 @@ xrealloc(p, n)
 #ifdef DEBUG
 
 static void
-prtok(t)
-     token t;
+prtok (token t)
 {
   char *s;
 
@@ -245,33 +268,25 @@ prtok(t)
 /* Stuff pertaining to charclasses. */
 
 static int
-tstbit(b, c)
-     int b;
-     charclass c;
+tstbit (int b, charclass c)
 {
   return c[b / INTBITS] & 1 << b % INTBITS;
 }
 
 static void
-setbit(b, c)
-     int b;
-     charclass c;
+setbit (int b, charclass c)
 {
   c[b / INTBITS] |= 1 << b % INTBITS;
 }
 
 static void
-clrbit(b, c)
-     int b;
-     charclass c;
+clrbit (int b, charclass c)
 {
   c[b / INTBITS] &= ~(1 << b % INTBITS);
 }
 
 static void
-copyset(src, dst)
-     charclass src;
-     charclass dst;
+copyset (charclass src, charclass dst)
 {
   int i;
 
@@ -280,8 +295,7 @@ copyset(src, dst)
 }
 
 static void
-zeroset(s)
-     charclass s;
+zeroset (charclass s)
 {
   int i;
 
@@ -290,8 +304,7 @@ zeroset(s)
 }
 
 static void
-notset(s)
-     charclass s;
+notset (charclass s)
 {
   int i;
 
@@ -300,9 +313,7 @@ notset(s)
 }
 
 static int
-equal(s1, s2)
-     charclass s1;
-     charclass s2;
+equal (charclass s1, charclass s2)
 {
   int i;
 
@@ -317,8 +328,7 @@ static struct dfa *dfa;
 
 /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
 static int
-charclass_index(s)
-     charclass s;
+charclass_index (charclass s)
 {
   int i;
 
@@ -337,15 +347,17 @@ static reg_syntax_t syntax_bits, syntax_bits_set;
 /* Flag for case-folding letters into sets. */
 static int case_fold;
 
+/* End-of-line byte in data.  */
+static unsigned char eolbyte;
+
 /* Entry point to set syntax options. */
 void
-dfasyntax(bits, fold)
-     reg_syntax_t bits;
-     int fold;
+dfasyntax (reg_syntax_t bits, int fold, int eol)
 {
   syntax_bits_set = 1;
   syntax_bits = bits;
   case_fold = fold;
+  eolbyte = eol;
 }
 
 /* Lexical analyzer.  All the dross that deals with the obnoxious
@@ -353,7 +365,6 @@ dfasyntax(bits, fold)
    reader is referred to the GNU Regex documentation for the
    meaning of the @#%!@#%^!@ syntax bits. */
 
-static char *lexstart;		/* Pointer to beginning of input string. */
 static char *lexptr;		/* Pointer to next input character. */
 static int lexleft;		/* Number of characters remaining. */
 static token lasttok;		/* Previous token returned; initially END. */
@@ -366,10 +377,12 @@ static int minrep, maxrep;	/* Repeat counts for {m,n}. */
 #define FETCH(c, eoferr)   	      \
   {			   	      \
     if (! lexleft)	   	      \
-      if (eoferr != 0)	   	      \
-	dfaerror(eoferr);  	      \
-      else		   	      \
-	return lasttok = END;	      \
+      {				      \
+	if (eoferr != 0)	      \
+	  dfaerror (eoferr);	      \
+	else		   	      \
+	  return lasttok = END;	      \
+      }				      \
     (c) = (unsigned char) *lexptr++;  \
     --lexleft;		   	      \
   }
@@ -392,8 +405,8 @@ FUNC(is_print, ISPRINT)
 FUNC(is_graph, ISGRAPH)
 FUNC(is_cntrl, ISCNTRL)
 
-static int is_blank(c)
-int c;
+static int
+is_blank (int c)
 {
    return (c == ' ' || c == '\t');
 }
@@ -403,7 +416,7 @@ int c;
    the class.  The leading [ has already been eaten by the lexical analyzer. */
 static struct {
   const char *name;
-  int (*pred) _RE_ARGS((int));
+  int (*pred) PARAMS ((int));
 } prednames[] = {
   { ":alpha:]", is_alpha },
   { ":upper:]", is_upper },
@@ -420,9 +433,11 @@ static struct {
   { 0 }
 };
 
+/* Return non-zero if C is a `word-constituent' byte; zero otherwise.  */
+#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
+
 static int
-looking_at(s)
-     const char *s;
+looking_at (char const *s)
 {
   size_t len;
 
@@ -433,12 +448,14 @@ looking_at(s)
 }
 
 static token
-lex()
+lex (void)
 {
   token c, c1, c2;
   int backslash = 0, invert;
   charclass ccl;
   int i;
+  char lo[2];
+  char hi[2];
 
   /* Basic plan: We fetch a character.  If it's a backslash,
      we set the backslash flag and go through the loop again.
@@ -455,7 +472,7 @@ lex()
 	  if (backslash)
 	    goto normal_char;
 	  if (lexleft == 0)
-	    dfaerror("Unfinished \\ escape");
+	    dfaerror(_("Unfinished \\ escape"));
 	  backslash = 1;
 	  break;
 
@@ -561,44 +578,76 @@ lex()
 	    goto normal_char;
 	  if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
 	    goto normal_char;
-	  minrep = maxrep = 0;
+	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+	    goto normal_char;
+
+	  if (syntax_bits & RE_NO_BK_BRACES)
+	    {
+	      /* Scan ahead for a valid interval; if it's not valid,
+		 treat it as a literal '{'.  */
+	      int lo = -1, hi = -1;
+	      char const *p = lexptr;
+	      char const *lim = p + lexleft;
+	      for (;  p != lim && ISASCIIDIGIT (*p);  p++)
+		lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
+	      if (p != lim && *p == ',')
+		while (++p != lim && ISASCIIDIGIT (*p))
+		  hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
+	      else
+		hi = lo;
+	      if (p == lim || *p != '}'
+		  || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
+		goto normal_char;
+	    }
+
+	  minrep = 0;
 	  /* Cases:
 	     {M} - exact count
 	     {M,} - minimum count, maximum is infinity
-	     {,M} - 0 through M
 	     {M,N} - M through N */
-	  FETCH(c, "unfinished repeat count");
-	  if (ISDIGIT(c))
+	  FETCH(c, _("unfinished repeat count"));
+	  if (ISASCIIDIGIT (c))
 	    {
 	      minrep = c - '0';
 	      for (;;)
 		{
-		  FETCH(c, "unfinished repeat count");
-		  if (!ISDIGIT(c))
+		  FETCH(c, _("unfinished repeat count"));
+		  if (! ISASCIIDIGIT (c))
 		    break;
 		  minrep = 10 * minrep + c - '0';
 		}
 	    }
-	  else if (c != ',')
-	    dfaerror("malformed repeat count");
+	  else
+	    dfaerror(_("malformed repeat count"));
 	  if (c == ',')
-	    for (;;)
-	      {
-		FETCH(c, "unfinished repeat count");
-		if (!ISDIGIT(c))
-		  break;
-		maxrep = 10 * maxrep + c - '0';
-	      }
+	    {
+	      FETCH (c, _("unfinished repeat count"));
+	      if (! ISASCIIDIGIT (c))
+		maxrep = -1;
+	      else
+		{
+		  maxrep = c - '0';
+		  for (;;)
+		    {
+		      FETCH (c, _("unfinished repeat count"));
+		      if (! ISASCIIDIGIT (c))
+			break;
+		      maxrep = 10 * maxrep + c - '0';
+		    }
+		  if (0 <= maxrep && maxrep < minrep)
+		    dfaerror (_("malformed repeat count"));
+		}
+	    }
 	  else
 	    maxrep = minrep;
 	  if (!(syntax_bits & RE_NO_BK_BRACES))
 	    {
 	      if (c != '\\')
-		dfaerror("malformed repeat count");
-	      FETCH(c, "unfinished repeat count");
+		dfaerror(_("malformed repeat count"));
+	      FETCH(c, _("unfinished repeat count"));
 	    }
 	  if (c != '}')
-	    dfaerror("malformed repeat count");
+	    dfaerror(_("malformed repeat count"));
 	  laststart = 0;
 	  return lasttok = REPMN;
 
@@ -640,7 +689,7 @@ lex()
 	  zeroset(ccl);
 	  notset(ccl);
 	  if (!(syntax_bits & RE_DOT_NEWLINE))
-	    clrbit('\n', ccl);
+	    clrbit(eolbyte, ccl);
 	  if (syntax_bits & RE_DOT_NOT_NULL)
 	    clrbit('\0', ccl);
 	  laststart = 0;
@@ -652,22 +701,21 @@ lex()
 	    goto normal_char;
 	  zeroset(ccl);
 	  for (c2 = 0; c2 < NOTCHAR; ++c2)
-	    if (ISALNUM(c2))
+	    if (IS_WORD_CONSTITUENT(c2))
 	      setbit(c2, ccl);
-	  setbit('_', ccl);
 	  if (c == 'W')
 	    notset(ccl);
 	  laststart = 0;
 	  return lasttok = CSET + charclass_index(ccl);
-	
+
 	case '[':
 	  if (backslash)
 	    goto normal_char;
 	  zeroset(ccl);
-	  FETCH(c, "Unbalanced [");
+	  FETCH(c, _("Unbalanced ["));
 	  if (c == '^')
 	    {
-	      FETCH(c, "Unbalanced [");
+	      FETCH(c, _("Unbalanced ["));
 	      invert = 1;
 	    }
 	  else
@@ -694,15 +742,15 @@ lex()
 			  setbit(c2, ccl);
 		      lexptr += strlen(prednames[c1].name);
 		      lexleft -= strlen(prednames[c1].name);
-		      FETCH(c1, "Unbalanced [");
+		      FETCH(c1, _("Unbalanced ["));
 		      goto skip;
 		    }
 	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-		FETCH(c, "Unbalanced [");
-	      FETCH(c1, "Unbalanced [");
+		FETCH(c, _("Unbalanced ["));
+	      FETCH(c1, _("Unbalanced ["));
 	      if (c1 == '-')
 		{
-		  FETCH(c2, "Unbalanced [");
+		  FETCH(c2, _("Unbalanced ["));
 		  if (c2 == ']')
 		    {
 		      /* In the case [x-], the - is an ordinary hyphen,
@@ -715,8 +763,8 @@ lex()
 		    {
 		      if (c2 == '\\'
 			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-			FETCH(c2, "Unbalanced [");
-		      FETCH(c1, "Unbalanced [");
+			FETCH(c2, _("Unbalanced ["));
+		      FETCH(c1, _("Unbalanced ["));
 		    }
 		}
 	      else
@@ -742,17 +790,26 @@ lex()
 		  }
 	      }
 #else
-	      while (c <= c2)
+	      lo[0] = c;  lo[1] = '\0';
+	      hi[0] = c2; hi[1] = '\0';
+	      for (c = 0; c < NOTCHAR; c++)
 		{
-		  setbit(c, ccl);
-		  if (case_fold)
-		    if (ISUPPER(c))
-		      setbit(tolower(c), ccl);
-		    else if (ISLOWER(c))
-		      setbit(toupper(c), ccl);
-		  ++c;
+		  char ch[2];
+		  ch[0] = c;  ch[1] = '\0';
+		  if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
+		    {
+		      setbit (c, ccl);
+		      if (case_fold)
+			{
+			  if (ISUPPER (c))
+			    setbit (tolower (c), ccl);
+			  else if (ISLOWER (c))
+			    setbit (toupper (c), ccl);
+			}
+		    }
 		}
 #endif
+
 	    skip:
 	      ;
 	    }
@@ -761,7 +818,7 @@ lex()
 	    {
 	      notset(ccl);
 	      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
-		clrbit('\n', ccl);
+		clrbit(eolbyte, ccl);
 	    }
 	  laststart = 0;
 	  return lasttok = CSET + charclass_index(ccl);
@@ -801,8 +858,7 @@ static int depth;		/* Current depth of a hypothetical stack
 /* Add the given token to the parse tree, maintaining the depth count and
    updating the maximum depth if necessary. */
 static void
-addtok(t)
-     token t;
+addtok (token t)
 {
   REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
   dfa->tokens[dfa->tindex++] = t;
@@ -861,7 +917,7 @@ addtok(t)
    The parser builds a parse tree in postfix form in an array of tokens. */
 
 static void
-atom()
+atom (void)
 {
   if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
       || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@@ -875,7 +931,7 @@ atom()
       tok = lex();
       regexp(0);
       if (tok != RPAREN)
-	dfaerror("Unbalanced (");
+	dfaerror(_("Unbalanced ("));
       tok = lex();
     }
   else
@@ -884,8 +940,7 @@ atom()
 
 /* Return the number of tokens in the given subexpression. */
 static int
-nsubtoks(tindex)
-int tindex;
+nsubtoks (int tindex)
 {
   int ntoks1;
 
@@ -907,8 +962,7 @@ int tindex;
 
 /* Copy the given subexpression to the top of the tree. */
 static void
-copytoks(tindex, ntokens)
-     int tindex, ntokens;
+copytoks (int tindex, int ntokens)
 {
   int i;
 
@@ -917,7 +971,7 @@ copytoks(tindex, ntokens)
 }
 
 static void
-closure()
+closure (void)
 {
   int tindex, ntokens, i;
 
@@ -927,7 +981,7 @@ closure()
       {
 	ntokens = nsubtoks(dfa->tindex);
 	tindex = dfa->tindex - ntokens;
-	if (maxrep == 0)
+	if (maxrep < 0)
 	  addtok(PLUS);
 	if (minrep == 0)
 	  addtok(QMARK);
@@ -952,7 +1006,7 @@ closure()
 }
 
 static void
-branch()
+branch (void)
 {
   closure();
   while (tok != RPAREN && tok != OR && tok >= 0)
@@ -963,8 +1017,7 @@ branch()
 }
 
 static void
-regexp(toplevel)
-     int toplevel;
+regexp (int toplevel)
 {
   branch();
   while (tok == OR)
@@ -982,21 +1035,17 @@ regexp(toplevel)
    length of the string, so s can include NUL characters.  D is a pointer to
    the struct dfa to parse into. */
 void
-dfaparse(s, len, d)
-     char *s;
-     size_t len;
-     struct dfa *d;
-
+dfaparse (char *s, size_t len, struct dfa *d)
 {
   dfa = d;
-  lexstart = lexptr = s;
+  lexptr = s;
   lexleft = len;
   lasttok = END;
   laststart = 1;
   parens = 0;
 
   if (! syntax_bits_set)
-    dfaerror("No syntax specified");
+    dfaerror(_("No regexp syntax bits specified"));
 
   tok = lex();
   depth = d->depth;
@@ -1004,7 +1053,7 @@ dfaparse(s, len, d)
   regexp(1);
 
   if (tok != END)
-    dfaerror("Unbalanced )");
+    dfaerror(_("Unbalanced )"));
 
   addtok(END - d->nregexps);
   addtok(CAT);
@@ -1019,9 +1068,7 @@ dfaparse(s, len, d)
 
 /* Copy one set to another; the destination must be large enough. */
 static void
-copy(src, dst)
-     position_set *src;
-     position_set *dst;
+copy (position_set *src, position_set *dst)
 {
   int i;
 
@@ -1035,9 +1082,7 @@ copy(src, dst)
    the same index then their constraints are logically or'd together.
    S->elems must point to an array large enough to hold the resulting set. */
 static void
-insert(p, s)
-     position p;
-     position_set *s;
+insert (position p, position_set *s)
 {
   int i;
   position t1, t2;
@@ -1062,10 +1107,7 @@ insert(p, s)
 /* Merge two sets of positions into a third.  The result is exactly as if
    the positions of both sets were inserted into an initially empty set. */
 static void
-merge(s1, s2, m)
-     position_set *s1;
-     position_set *s2;
-     position_set *m;
+merge (position_set *s1, position_set *s2, position_set *m)
 {
   int i = 0, j = 0;
 
@@ -1088,9 +1130,7 @@ merge(s1, s2, m)
 
 /* Delete a position from a set. */
 static void
-delete(p, s)
-     position p;
-     position_set *s;
+delete (position p, position_set *s)
 {
   int i;
 
@@ -1107,11 +1147,7 @@ delete(p, s)
    state.  Newline and letter tell whether we got here on a newline or
    letter, respectively. */
 static int
-state_index(d, s, newline, letter)
-     struct dfa *d;
-     position_set *s;
-     int newline;
-     int letter;
+state_index (struct dfa *d, position_set *s, int newline, int letter)
 {
   int hash = 0;
   int constraint;
@@ -1176,12 +1212,8 @@ state_index(d, s, newline, letter)
    that position with the elements of its follow labeled with an appropriate
    constraint.  Repeat exhaustively until no funny positions are left.
    S->elems must be large enough to hold the result. */
-static void epsclosure _RE_ARGS((position_set *s, struct dfa *d));
-
 static void
-epsclosure(s, d)
-     position_set *s;
-     struct dfa *d;
+epsclosure (position_set *s, struct dfa *d)
 {
   int i, j;
   int *visited;
@@ -1293,9 +1325,7 @@ epsclosure(s, d)
    scheme; the number of elements in each set deeper in the stack can be
    used to determine the address of a particular set's array. */
 void
-dfaanalyze(d, searchflag)
-     struct dfa *d;
-     int searchflag;
+dfaanalyze (struct dfa *d, int searchflag)
 {
   int *nullable;		/* Nullable stack. */
   int *nfirstpos;		/* Element count stack for firstpos sets. */
@@ -1556,10 +1586,7 @@ dfaanalyze(d, searchflag)
    create a new group labeled with the characters of C and insert this
    position in that group. */
 void
-dfastate(s, d, trans)
-     int s;
-     struct dfa *d;
-     int trans[];
+dfastate (int s, struct dfa *d, int trans[])
 {
   position_set grps[NOTCHAR];	/* As many as will ever be needed. */
   charclass labels[NOTCHAR];	/* Labels corresponding to the groups. */
@@ -1588,9 +1615,9 @@ dfastate(s, d, trans)
     {
       initialized = 1;
       for (i = 0; i < NOTCHAR; ++i)
-	if (ISALNUM(i))
+	if (IS_WORD_CONSTITUENT(i))
 	  setbit(i, letters);
-      setbit('\n', newline);
+      setbit(eolbyte, newline);
     }
 
   zeroset(matches);
@@ -1611,7 +1638,7 @@ dfastate(s, d, trans)
 	{
 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
 					 d->states[s].newline, 1))
-	    clrbit('\n', matches);
+	    clrbit(eolbyte, matches);
 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
 					 d->states[s].newline, 0))
 	    for (j = 0; j < CHARCLASS_INTS; ++j)
@@ -1721,12 +1748,8 @@ dfastate(s, d, trans)
       else
 	state_letter = state;
       for (i = 0; i < NOTCHAR; ++i)
-	if (i == '\n')
-	  trans[i] = state_newline;
-	else if (ISALNUM(i))
-	  trans[i] = state_letter;
-	else
-	  trans[i] = state;
+	trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
+      trans[eolbyte] = state_newline;
     }
   else
     for (i = 0; i < NOTCHAR; ++i)
@@ -1750,7 +1773,7 @@ dfastate(s, d, trans)
 
       /* Find out if the new state will want any context information. */
       wants_newline = 0;
-      if (tstbit('\n', labels[i]))
+      if (tstbit(eolbyte, labels[i]))
 	for (j = 0; j < follows.nelem; ++j)
 	  if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
 	    wants_newline = 1;
@@ -1782,9 +1805,9 @@ dfastate(s, d, trans)
 	    {
 	      int c = j * INTBITS + k;
 
-	      if (c == '\n')
+	      if (c == eolbyte)
 		trans[c] = state_newline;
-	      else if (ISALNUM(c))
+	      else if (IS_WORD_CONSTITUENT(c))
 		trans[c] = state_letter;
 	      else if (c < NOTCHAR)
 		trans[c] = state;
@@ -1805,9 +1828,7 @@ dfastate(s, d, trans)
    TODO: Improve this comment, get rid of the unnecessary redundancy. */
 
 static void
-build_state(s, d)
-     int s;
-     struct dfa *d;
+build_state (int s, struct dfa *d)
 {
   int *trans;			/* The new transition table. */
   int i;
@@ -1873,8 +1894,8 @@ build_state(s, d)
 
   /* Keep the newline transition in a special place so we can use it as
      a sentinel. */
-  d->newlines[s] = trans['\n'];
-  trans['\n'] = -1;
+  d->newlines[s] = trans[eolbyte];
+  trans[eolbyte] = -1;
 
   if (ACCEPTING(s, *d))
     d->fails[s] = trans;
@@ -1883,8 +1904,7 @@ build_state(s, d)
 }
 
 static void
-build_state_zero(d)
-     struct dfa *d;
+build_state_zero (struct dfa *d)
 {
   d->tralloc = 1;
   d->trcount = 0;
@@ -1910,18 +1930,14 @@ build_state_zero(d)
    match needs to be verified by a backtracking matcher.  Otherwise
    we store a 0 in *backref. */
 char *
-dfaexec(d, begin, end, newline, count, backref)
-     struct dfa *d;
-     char *begin;
-     char *end;
-     int newline;
-     int *count;
-     int *backref;
+dfaexec (struct dfa *d, char *begin, char *end,
+	 int newline, int *count, int *backref)
 {
   register int s, s1, tmp;	/* Current state. */
   register unsigned char *p;	/* Current input character. */
   register int **trans, *t;	/* Copy of d->trans so it can be optimized
 				   into a register. */
+  register unsigned char eol = eolbyte;	/* Likewise for eolbyte.  */
   static int sbit[NOTCHAR];	/* Table for anding with d->success. */
   static int sbit_init;
 
@@ -1931,12 +1947,8 @@ dfaexec(d, begin, end, newline, count, backref)
 
       sbit_init = 1;
       for (i = 0; i < NOTCHAR; ++i)
-	if (i == '\n')
-	  sbit[i] = 4;
-	else if (ISALNUM(i))
-	  sbit[i] = 2;
-	else
-	  sbit[i] = 1;
+	sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
+      sbit[eol] = 4;
     }
 
   if (! d->tralloc)
@@ -1945,34 +1957,25 @@ dfaexec(d, begin, end, newline, count, backref)
   s = s1 = 0;
   p = (unsigned char *) begin;
   trans = d->trans;
-  *end = '\n';
+  *end = eol;
 
   for (;;)
     {
-      /* The dreaded inner loop. */
-      if ((t = trans[s]) != 0)
-	do
-	  {
-	    s1 = t[*p++];
-	    if (! (t = trans[s1]))
-	      goto last_was_s;
-	    s = t[*p++];
-	  }
-        while ((t = trans[s]) != 0);
-      goto last_was_s1;
-    last_was_s:
-      tmp = s, s = s1, s1 = tmp;
-    last_was_s1:
+      while ((t = trans[s]) != 0) { /* hand-optimized loop */
+	s1 = t[*p++];
+        if ((t = trans[s1]) == 0) {
+           tmp = s ; s = s1 ; s1 = tmp ; /* swap */
+           break;
+        }
+	s = t[*p++];
+      }
 
       if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
 	{
 	  if (d->success[s] & sbit[*p])
 	    {
 	      if (backref)
-		if (d->states[s].backref)
-		  *backref = 1;
-		else
-		  *backref = 0;
+		*backref = (d->states[s].backref != 0);
 	      return (char *) p;
 	    }
 
@@ -1982,7 +1985,7 @@ dfaexec(d, begin, end, newline, count, backref)
 	}
 
       /* If the previous character was a newline, count it. */
-      if (count && (char *) p <= end && p[-1] == '\n')
+      if (count && (char *) p <= end && p[-1] == eol)
 	++*count;
 
       /* Check if we've run off the end of the buffer. */
@@ -1996,7 +1999,7 @@ dfaexec(d, begin, end, newline, count, backref)
 	  continue;
 	}
 
-      if (p[-1] == '\n' && newline)
+      if (p[-1] == eol && newline)
 	{
 	  s = d->newlines[s1];
 	  continue;
@@ -2009,8 +2012,7 @@ dfaexec(d, begin, end, newline, count, backref)
 /* Initialize the components of a dfa that the other routines don't
    initialize for themselves. */
 void
-dfainit(d)
-     struct dfa *d;
+dfainit (struct dfa *d)
 {
   d->calloc = 1;
   MALLOC(d->charclasses, charclass, d->calloc);
@@ -2024,15 +2026,16 @@ dfainit(d)
   d->tralloc = 0;
 
   d->musts = 0;
+  d->realtrans = 0;
+  d->fails = 0;
+  d->newlines = 0;
+  d->success = 0;
+
 }
 
 /* Parse and analyze a single string of the given length. */
 void
-dfacomp(s, len, d, searchflag)
-     char *s;
-     size_t len;
-     struct dfa *d;
-     int searchflag;
+dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
 {
   if (case_fold)	/* dummy folding in service of dfamust() */
     {
@@ -2041,13 +2044,13 @@ dfacomp(s, len, d, searchflag)
 
       lcopy = malloc(len);
       if (!lcopy)
-	dfaerror("out of memory");
-      
+	dfaerror(_("out of memory"));
+
       /* This is a kludge. */
       case_fold = 0;
       for (i = 0; i < len; ++i)
-	if (ISUPPER(s[i]))
-	  lcopy[i] = tolower(s[i]);
+	if (ISUPPER ((unsigned char) s[i]))
+	  lcopy[i] = tolower ((unsigned char) s[i]);
 	else
 	  lcopy[i] = s[i];
 
@@ -2071,8 +2074,7 @@ dfacomp(s, len, d, searchflag)
 
 /* Free the storage held by the components of a dfa. */
 void
-dfafree(d)
-     struct dfa *d;
+dfafree (struct dfa *d)
 {
   int i;
   struct dfamust *dm, *ndm;
@@ -2133,9 +2135,9 @@ dfafree(d)
 	Type	left		right		is		in
 	----	----		-----		--		--
 	char c	# c		# c		# c		# c
-	
+
 	CSET	ZERO		ZERO		ZERO		ZERO
-	
+
 	STAR	ZERO		ZERO		ZERO		ZERO
 
 	QMARK	ZERO		ZERO		ZERO		ZERO
@@ -2146,12 +2148,12 @@ dfafree(d)
 		p->left :	q->right :	q->is!=ZERO) ?	q->in plus
 		p->is##q->left	p->right##q->is	p->is##q->is :	p->right##q->left
 						ZERO
-					
+
 	OR	longest common	longest common	(do p->is and	substrings common to
 		leading		trailing	q->is have same	p->in and q->in
-		(sub)sequence	(sub)sequence	length and	
-		of p->left	of p->right	content) ?	
-		and q->left	and q->right	p->is : NULL	
+		(sub)sequence	(sub)sequence	length and
+		of p->left	of p->right	content) ?
+		and q->left	and q->right	p->is : NULL
 
    If there's anything else we recognize in the tree, all four sequences get set
    to zero-length sequences.  If there's something we don't recognize in the tree,
@@ -2178,15 +2180,13 @@ dfafree(d)
    Does optimization actually accomplish anything,
    or is the automaton you get from "psi|epsilon" (for example)
    the same as the one you get from "psi" (for example)?
-  
+
    Are optimizable r.e.'s likely to be used in real-life situations
    (something like 'ab*' is probably unlikely; something like is
    'psi|epsilon' is likelier)? */
 
 static char *
-icatalloc(old, new)
-     char *old;
-     char *new;
+icatalloc (char *old, char *new)
 {
   char *result;
   size_t oldsize, newsize;
@@ -2207,16 +2207,13 @@ icatalloc(old, new)
 }
 
 static char *
-icpyalloc(string)
-     char *string;
+icpyalloc (char *string)
 {
   return icatalloc((char *) NULL, string);
 }
 
 static char *
-istrstr(lookin, lookfor)
-     char *lookin;
-     char *lookfor;
+istrstr (char *lookin, char *lookfor)
 {
   char *cp;
   size_t len;
@@ -2229,16 +2226,14 @@ istrstr(lookin, lookfor)
 }
 
 static void
-ifree(cp)
-     char *cp;
+ifree (char *cp)
 {
   if (cp != NULL)
     free(cp);
 }
 
 static void
-freelist(cpp)
-     char **cpp;
+freelist (char **cpp)
 {
   int i;
 
@@ -2252,10 +2247,7 @@ freelist(cpp)
 }
 
 static char **
-enlist(cpp, new, len)
-     char **cpp;
-     char *new;
-     size_t len;
+enlist (char **cpp, char *new, size_t len)
 {
   int i, j;
 
@@ -2300,9 +2292,7 @@ enlist(cpp, new, len)
    list of their distinct common substrings. Return NULL if something
    seems wild. */
 static char **
-comsubs(left, right)
-     char *left;
-     char *right;
+comsubs (char *left, char *right)
 {
   char **cpp;
   char *lcp;
@@ -2336,9 +2326,7 @@ comsubs(left, right)
 }
 
 static char **
-addlists(old, new)
-char **old;
-char **new;
+addlists (char **old, char **new)
 {
   int i;
 
@@ -2356,9 +2344,7 @@ char **new;
 /* Given two lists of substrings, return a new list giving substrings
    common to both. */
 static char **
-inboth(left, right)
-     char **left;
-     char **right;
+inboth (char **left, char **right)
 {
   char **both;
   char **temp;
@@ -2399,16 +2385,14 @@ typedef struct
 } must;
 
 static void
-resetmust(mp)
-must *mp;
+resetmust (must *mp)
 {
   mp->left[0] = mp->right[0] = mp->is[0] = '\0';
   freelist(mp->in);
 }
 
 static void
-dfamust(dfa)
-struct dfa *dfa;
+dfamust (struct dfa *dfa)
 {
   must *musts;
   must *mp;
author	obrien <obrien@FreeBSD.org>	2001-11-02 21:34:12 +0000
committer	obrien <obrien@FreeBSD.org>	2001-11-02 21:34:12 +0000
commit	dc6f25b547f30f5e80e003f51948161c0cbe77d9 (patch)
tree	c71ed68868bfd0faea4346c572cafb502875beee /contrib/awk
parent	f99bc3baaa551ba49e40c2deb3b237bf89c299f0 (diff)
download	FreeBSD-src-dc6f25b547f30f5e80e003f51948161c0cbe77d9.zip FreeBSD-src-dc6f25b547f30f5e80e003f51948161c0cbe77d9.tar.gz