From 15169bd09781aa4d3f09d050d40b381b29c965a4 Mon Sep 17 00:00:00 2001 From: des Date: Tue, 19 Feb 2002 09:35:25 +0000 Subject: Vendor import of bwk's 2002-02-18 release. Most significant update is the inclusion of my character class patch. --- contrib/one-true-awk/FIXES | 32 +++++++++++++++++++++++++++++ contrib/one-true-awk/README | 5 ++--- contrib/one-true-awk/b.c | 49 +++++++++++++++++++++++++++++++++++++++++++-- contrib/one-true-awk/lex.c | 20 ++++++++++++++---- contrib/one-true-awk/main.c | 20 ++++-------------- contrib/one-true-awk/run.c | 21 +++++++++++++++++-- contrib/one-true-awk/tran.c | 5 ++++- 7 files changed, 124 insertions(+), 28 deletions(-) diff --git a/contrib/one-true-awk/FIXES b/contrib/one-true-awk/FIXES index 236323f..89826cc 100644 --- a/contrib/one-true-awk/FIXES +++ b/contrib/one-true-awk/FIXES @@ -25,6 +25,38 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +Feb 10, 2002: + changed types in posix chars structure to quiet solaris cc. + +Jan 1, 2002: + fflush() or fflush("") flushes all files and pipes. + + length(arrayname) returns number of elements; thanks to + arnold robbins for suggestion. + + added a makefile.win to make it easier to build on windows. + based on dan allen's buildwin.bat. + +Nov 16, 2001: + added support for posix character class names like [:digit:], + which are not exactly shorter than [0-9] and perhaps no more + portable. thanks to dag-erling smorgrav for code. + +Feb 16, 2001: + removed -m option; no longer needed, and it was actually + broken (noted thanks to volker kiefel). + +Feb 10, 2001: + fixed an appalling bug in gettok: any sequence of digits, +,-, E, e, + and period was accepted as a valid number if it started with a period. + this would never have happened with the lex version. + + other 1-character botches, now fixed, include a bare $ and a + bare " at the end of the input. + +Feb 7, 2001: + more (const char *) casts in b.c and tran.c to silence warnings. + Nov 15, 2000: fixed a bug introduced in august 1997 that caused expressions like $f[1] to be syntax errors. thanks to arnold robbins for diff --git a/contrib/one-true-awk/README b/contrib/one-true-awk/README index bdb7e50..efcbfa5 100644 --- a/contrib/one-true-awk/README +++ b/contrib/one-true-awk/README @@ -68,7 +68,7 @@ compilers on a variety of systems, but new systems or compilers may raise some new complaint; reports of difficulties are welcome. -This also compiles with Visual C++ on Windows 95 and Windows NT, +This also compiles with Visual C++ on all flavors of Windows, *if* you provide versions of popen and pclose. The file missing95.c contains versions that can be used to get started with, though the underlying support has mysterious properties, @@ -76,8 +76,7 @@ the symptom of which can be truncated pipe output. Beware. This is also said to compile on Macintosh systems, using the file "buildmac" provided by Dan Allen (danallen@microsoft.com), -to whom many thanks. Dan also provided buildwin.bat, a simple -script for compiling on NT if you prefer. +to whom many thanks. The version of malloc that comes with some systems is sometimes astonishly slow. If awk seems slow, you might try fixing that. diff --git a/contrib/one-true-awk/b.c b/contrib/one-true-awk/b.c index 3153151..a9f01bf 100644 --- a/contrib/one-true-awk/b.c +++ b/contrib/one-true-awk/b.c @@ -93,7 +93,7 @@ fa *makedfa(char *s, int anchor) /* returns dfa for reg expr s */ return mkdfa(s, anchor); for (i = 0; i < nfatab; i++) /* is it there already? */ if (fatab[i]->anchor == anchor - && strcmp(fatab[i]->restr, s) == 0) { + && strcmp((const char *) fatab[i]->restr, s) == 0) { fatab[i]->use = now++; return fatab[i]; } @@ -683,6 +683,37 @@ Node *unary(Node *np) } } +/* + * Character class definitions conformant to the POSIX locale as + * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source + * and operating character sets are both ASCII (ISO646) or supersets + * thereof. + * + * Note that to avoid overflowing the temporary buffer used in + * relex(), the expanded character class (prior to range expansion) + * must be less than twice the size of their full name. + */ +struct charclass { + const char *cc_name; + int cc_namelen; + const char *cc_expand; +} charclasses[] = { + { "alnum", 5, "0-9A-Za-z" }, + { "alpha", 5, "A-Za-z" }, + { "blank", 5, " \t" }, + { "cntrl", 5, "\000-\037\177" }, + { "digit", 5, "0-9" }, + { "graph", 5, "\041-\176" }, + { "lower", 5, "a-z" }, + { "print", 5, " \041-\176" }, + { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" }, + { "space", 5, " \f\n\r\t\v" }, + { "upper", 5, "A-Z" }, + { "xdigit", 6, "0-9A-Fa-f" }, + { NULL, 0, NULL }, +}; + + int relex(void) /* lexical analyzer for reparse */ { int c, n; @@ -690,6 +721,8 @@ int relex(void) /* lexical analyzer for reparse */ static uschar *buf = 0; static int bufsz = 100; uschar *bp; + struct charclass *cc; + const uschar *p; switch (c = *prestr++) { case '|': return OR; @@ -719,7 +752,7 @@ int relex(void) /* lexical analyzer for reparse */ } else cflag = 0; - n = 2 * strlen(prestr)+1; + n = 2 * strlen((const char *) prestr)+1; if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) FATAL("out of space for reg expr %.10s...", lastre); for (; ; ) { @@ -730,6 +763,18 @@ int relex(void) /* lexical analyzer for reparse */ *bp++ = c; /* } else if (c == '\n') { */ /* FATAL("newline in character class %.20s...", lastre); */ + } else if (c == '[' && *prestr == ':') { + /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ + for (cc = charclasses; cc->cc_name; cc++) + if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) + break; + if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && + prestr[2 + cc->cc_namelen] == ']') { + prestr += cc->cc_namelen + 3; + for (p = (const uschar *) cc->cc_expand; *p; p++) + *bp++ = *p; + } else + *bp++ = c; } else if (c == '\0') { FATAL("nonterminated character class %.20s", lastre); } else if (bp == buf) { /* 1st char is special */ diff --git a/contrib/one-true-awk/lex.c b/contrib/one-true-awk/lex.c index a947109..2b16b25 100644 --- a/contrib/one-true-awk/lex.c +++ b/contrib/one-true-awk/lex.c @@ -105,7 +105,7 @@ int peek(void) int gettok(char **pbuf, int *psz) /* get next input token */ { - int c; + int c, retc; char *buf = *pbuf; int sz = *psz; char *bp = buf; @@ -133,6 +133,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */ } } *bp = 0; + retc = 'a'; /* alphanumeric */ } else { /* it's a number */ char *rem; /* read input until can't be a number */ @@ -151,11 +152,17 @@ int gettok(char **pbuf, int *psz) /* get next input token */ *bp = 0; strtod(buf, &rem); /* parse the number */ unputstr(rem); /* put rest back for later */ - rem[0] = 0; + if (rem == buf) { /* it wasn't a valid number at all */ + buf[1] = 0; /* so return one character as token */ + retc = buf[0]; /* character is its own type */ + } else { /* some prefix was a number */ + rem[0] = 0; /* so truncate where failure started */ + retc = '0'; /* number */ + } } *pbuf = buf; *psz = sz; - return buf[0]; + return retc; } int word(char *); @@ -186,7 +193,7 @@ int yylex(void) return 0; if (isalpha(c) || c == '_') return word(buf); - if (isdigit(c) || c == '.') { + if (isdigit(c)) { yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); /* should this also have STR set? */ RET(NUMBER); @@ -311,6 +318,9 @@ int yylex(void) } yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); RET(IVAR); + } else if (c == 0) { /* */ + SYNTAX( "unexpected end of input after $" ); + RET(';'); } else { unputstr(buf); RET(INDIRECT); @@ -366,6 +376,8 @@ int string(void) case 0: SYNTAX( "non-terminated string %.10s...", buf ); lineno++; + if (c == 0) /* hopeless */ + FATAL( "giving up" ); break; case '\\': c = input(); diff --git a/contrib/one-true-awk/main.c b/contrib/one-true-awk/main.c index d807ca5..911b058 100644 --- a/contrib/one-true-awk/main.c +++ b/contrib/one-true-awk/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -char *version = "version 20001115"; +char *version = "version 20020101"; #define DEBUG #include @@ -52,8 +52,7 @@ int safe = 0; /* 1 => "safe" mode */ int main(int argc, char *argv[]) { - char *fs = NULL, *marg; - int temp; + char *fs = NULL; cmdname = argv[0]; if (argc == 1) { @@ -102,19 +101,8 @@ int main(int argc, char *argv[]) setclvar(argv[1]); break; case 'm': /* more memory: -mr=record, -mf=fields */ - /* no longer needed */ - marg = argv[1]; - if (argv[1][3]) - temp = atoi(&argv[1][3]); - else { - argv++; argc--; - temp = atoi(&argv[1][0]); - } - switch (marg[2]) { - case 'r': recsize = temp; break; - case 'f': nfields = temp; break; - default: FATAL("unknown option %s\n", marg); - } + /* no longer supported */ + WARNING("obsolete option %s ignored", argv[1]); break; case 'd': dbg = atoi(&argv[1][2]); diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c index 2d71766..2f60a37 100644 --- a/contrib/one-true-awk/run.c +++ b/contrib/one-true-awk/run.c @@ -1448,13 +1448,18 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis char *p, *buf; Node *nextarg; FILE *fp; + void flush_all(void); t = ptoi(a[0]); x = execute(a[1]); nextarg = a[1]->nnext; switch (t) { case FLENGTH: - u = strlen(getsval(x)); break; + if (isarr(x)) + u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ + else + u = strlen(getsval(x)); + break; case FLOG: u = errcheck(log(getfval(x)), "log"); break; case FINT: @@ -1511,7 +1516,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis free(buf); return x; case FFLUSH: - if ((fp = openfile(FFLUSH, getsval(x))) == NULL) + if (isrec(x) || strlen(getsval(x)) == 0) { + flush_all(); /* fflush() or fflush("") -> all */ + u = 0; + } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) u = EOF; else u = fflush(fp); @@ -1702,6 +1710,15 @@ void closeall(void) } } +void flush_all(void) +{ + int i; + + for (i = 0; i < FOPEN_MAX; i++) + if (files[i].fp) + fflush(files[i].fp); +} + void backsub(char **pb_ptr, char **sptr_ptr); Cell *sub(Node **a, int nnn) /* substitute command */ diff --git a/contrib/one-true-awk/tran.c b/contrib/one-true-awk/tran.c index 8e0faf0..a44d572 100644 --- a/contrib/one-true-awk/tran.c +++ b/contrib/one-true-awk/tran.c @@ -170,9 +170,12 @@ void freesymtab(Cell *ap) /* free a symbol table */ xfree(cp->sval); temp = cp->cnext; /* avoids freeing then using */ free(cp); + tp->nelem--; } tp->tab[i] = 0; } + if (tp->nelem != 0) + WARNING("can't happen: inconsistent element count freeing %s", ap->nval); free(tp->tab); free(tp); } @@ -396,7 +399,7 @@ char *qstring(char *is, int delim) /* collect string up to next delim */ uschar *s = (uschar *) is; uschar *buf, *bp; - if ((buf = (uschar *) malloc(strlen(s)+3)) == NULL) + if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL) FATAL( "out of space in qstring(%s)", s); for (bp = buf; (c = *s) != delim; s++) { if (c == '\n') -- cgit v1.1