diff options
author | sheldonh <sheldonh@FreeBSD.org> | 1999-09-27 08:51:04 +0000 |
---|---|---|
committer | sheldonh <sheldonh@FreeBSD.org> | 1999-09-27 08:51:04 +0000 |
commit | b725e888cf822f521a9b7e26005b40c6d57beda4 (patch) | |
tree | 80d722fe88f0d4797e912358c9280a6b7186fc89 /contrib/awk | |
parent | a46c41193ff2573a4c910e19b570e9c253e714a1 (diff) | |
download | FreeBSD-src-b725e888cf822f521a9b7e26005b40c6d57beda4.zip FreeBSD-src-b725e888cf822f521a9b7e26005b40c6d57beda4.tar.gz |
Virgin import of GNU awk 3.0.4, which fixes at least a memory mis-
management problem involving custom print formats.
PR: 13615
Reported by: Scott Hazen Mueller <scott@zorch.sf-bay.org>
Diffstat (limited to 'contrib/awk')
-rw-r--r-- | contrib/awk/ChangeLog | 301 | ||||
-rw-r--r-- | contrib/awk/FUTURES | 2 | ||||
-rw-r--r-- | contrib/awk/NEWS | 76 | ||||
-rw-r--r-- | contrib/awk/PORTS | 57 | ||||
-rw-r--r-- | contrib/awk/POSIX.STD | 5 | ||||
-rw-r--r-- | contrib/awk/README | 8 | ||||
-rw-r--r-- | contrib/awk/README_d/README.FIRST | 2 | ||||
-rw-r--r-- | contrib/awk/acconfig.h | 5 | ||||
-rw-r--r-- | contrib/awk/array.c | 10 | ||||
-rw-r--r-- | contrib/awk/awk.h | 14 | ||||
-rw-r--r-- | contrib/awk/awk.y | 19 | ||||
-rw-r--r-- | contrib/awk/builtin.c | 109 | ||||
-rw-r--r-- | contrib/awk/custom.h | 11 | ||||
-rw-r--r-- | contrib/awk/doc/ChangeLog | 27 | ||||
-rw-r--r-- | contrib/awk/doc/awk.1 | 16 | ||||
-rw-r--r-- | contrib/awk/doc/gawk.texi | 222 | ||||
-rw-r--r-- | contrib/awk/eval.c | 76 | ||||
-rw-r--r-- | contrib/awk/field.c | 66 | ||||
-rw-r--r-- | contrib/awk/gawkmisc.c | 2 | ||||
-rw-r--r-- | contrib/awk/io.c | 103 | ||||
-rw-r--r-- | contrib/awk/main.c | 8 | ||||
-rw-r--r-- | contrib/awk/msg.c | 2 | ||||
-rw-r--r-- | contrib/awk/node.c | 31 | ||||
-rw-r--r-- | contrib/awk/patchlevel.h | 2 | ||||
-rw-r--r-- | contrib/awk/posix/ChangeLog | 4 |
25 files changed, 920 insertions, 258 deletions
diff --git a/contrib/awk/ChangeLog b/contrib/awk/ChangeLog index 0fa6515..8961fd5 100644 --- a/contrib/awk/ChangeLog +++ b/contrib/awk/ChangeLog @@ -1,3 +1,304 @@ +Wed Jun 30 16:14:36 1999 Arnold D. Robbins <arnold@gnu.org> + + * Release 3.0.4: Release tar file made. This time for sure. + +Wed Jun 30 16:10:11 1999 Arnold D. Robbins <arnold@gnu.org> + + * awk.h: add include of <assert.h>, and comment about config.h + having to be included before any system headers. Otherwise, + with egcs-2.91.66 and later on Linux systems, and possibly + others, things break badly, due to the LFS macros. + * awk.y, builtin.c, eval.c, field.c, io.c: removed include + of assert.h + +Wed Jun 9 11:39:19 1999 Paul Eggert <eggert@twinsun.com> + + Port the large-file code to AIX, HP-UX, and IRIX. + Add cross-compilation support for large files. + + * config.guess, config.sub: New files. + + * configure.in (AC_CANONICAL_HOST): + Add; GAWK_AC_SYS_LARGEFILE needs this. + (GAWK_AC_SYS_LARGEFILE): Renamed from GAWK_AC_LARGE_FILES. + + * aclocal.m4 (GAWK_AC_SYS_LARGEFILE): Renamed from GAWK_AC_LARGE_FILES. + Add support for AIX and HP-UX. + (GAWK_AC_SYS_LARGEFILE_FLAGS, GAWK_AC_SYS_LARGEFILE_SPACE_APPEND, + GAWK_AC_SYS_LARGEFILE_MACRO_VALUE): New macros. + + * acconfig.h (_FILE_OFFSET_BITS, _LARGEFILE_SOURCE, _LARGE_FILES): + New macros. + + * Makefile.in (MISC): add config.guess and config.sub so they get + included in the distribution. + +Wed Jun 9 11:29:29 1999 Paul Eggert <eggert@twinsun.com> + + * io.c (iop_alloc): Don't mmap files whose sizes don't fit in `int'. + [ This isn't really needed, as HAVE_MMAP is #undef'ed at the top, + but it's there in case people want to take their life in their hands. ] + +Sun Jun 6 11:28:07 1999 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.46: Release tar file made. + +Wed Jun 2 14:36:24 1999 Arnold D. Robbins <arnold@gnu.org> + + * PORTS: Updated with a more recent list of systems + that gawk compiles and tests ok on. + +Tue Jun 1 14:24:59 1999 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.45: Release tar file made. + +Tue May 25 16:32:37 1999 Arnold D. Robbins <arnold@gnu.org> + + * builtin.c (format_tree): more smarts for weird cases, such as + zero precisions and zero values used with the `#' flag. + Thanks to Andreas Schwab (schwab@gnu.org) for pointing these out. + +Wed May 19 14:02:54 1999 Arnold D. Robbins <arnold@gnu.org> + + * io.c (do_close): move test for `close(FILENAME)' to after + loop through all open redirections. Fixes problems in obscure + cases with redirections in END rules. + +Sun May 16 14:08:39 1999 Arnold D. Robbins <arnold@gnu.org> + + * awk.y (yylex): fix group of characters including ',' to + set want_assign = FALSE. Fixes bizarre parsing problems in + function call lists, for example. + * io.c (get_a_record): repair logic for single-leading-newline + case. + +Tue May 11 16:48:11 1999 Arnold D. Robbins <arnold@gnu.org> + + * aclocal.m4 (GAWK_AC_AIX_TWEAK): new macro. + * configure.in: call it + * Makefile.in: (awklib/all): pass CFLAGS on to sub-make so + that password programs will get AIX magic defines. Avoids + having to tweak program code for those in doc/gawk.texi. + +Mon May 3 16:56:23 1999 Arnold D. Robbins <arnold@gnu.org> + + * array.c (do_delete): don't free_temp(subs) until after all + references to it are finished. + +Mon May 3 13:41:16 1999 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.44: Release tar file made. + +Sun May 2 18:25:43 1999 Arnold D. Robbins <arnold@gnu.org> + + * io.c (get_a_record): Do a really good job of stripping newlines + from the front of records when RS = "" and there's only one + newline at the front of the file, which the regex didn't catch. + +Wed Apr 28 12:27:49 1999 Arnold D. Robbins <arnold@gnu.org> + + * configure.in: more HP stuff: fix the manual alloca code so that + gawk will compile and link on HP systems. See the comments. + +Sun Apr 25 13:39:16 1999 Arnold D. Robbins <arnold@gnu.org> + + * Makefile.in (gawk): add $(CFLAGS) to linking step. + * configure.in: correctly do AC_FUNC_GETPRGP on HP systems too. + +Tue Apr 13 20:21:00 1999 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.43: Release tar file made. + +Tue Apr 13 19:02:20 1999 Arnold D. Robbins <arnold@gnu.org> + + * io.c (useropen, pidopen): add casts to int on arguments to + silence gcc warnings. + * regex.c (regcomp,regexec,regfree): add ifdef for APPLE. + +Thu Feb 4 10:38:02 1999 Arnold D. Robbins <arnold@gnu.org> + + * custom.h: hacks for BeOS. Not documented in the manual right now. + * configure.in: hacks for BeOS. Check for HP-UX and define C_ALLOCA + if not using gcc. I wish they'd just fix bison already. + +Sun Dec 20 16:57:38 1998 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.42: Release tar file made. + +Sun Nov 15 21:05:39 1998 Arnold D. Robbins <arnold@gnu.org> + + * io.c (gawk_popen): Add WIN32 to list of systems that use + the non-real-pipe version. From the PC gawk guys. + +Wed Nov 4 11:32:24 1998 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.41: Release tar file made. + +Tue Nov 3 16:24:35 1998 Arnold D. Robbins <arnold@gnu.org> + + * eval.c (r_get_lhs): Fix the cases for the special variables, + don't unref their current value if it's the same as the internal + copy; perhaps the current one is used in a concatenation or some + other expression somewhere higher up in the call chain. Ouch. + See test/getnr2tm.awk. + +Sun Nov 1 15:24:52 1998 Arnold D. Robbins <arnold@gnu.org> + + * builtin.c (format_tree): improve handling of zero-fill + when a precision is present. See test/zeroflag.awk. + +Wed Oct 28 20:40:17 1998 Arnold D. Robbins <arnold@gnu.org> + + * eval.c (r_tree_eval): Case for Node_concat. Get lengthes + separately, in case one expression has a side effect that + that changes another. Ugly, but it keeps gawk from core + dumping. See test/nasty.awk. + +Sun Oct 18 21:27:24 1998 Arnold D. Robbins <arnold@gnu.org> + + * awk.y (append_right): bug fix, if `list' or `new' are NULL, + return `list', so that things don't break too badly. + * regex.c (re_compile_fastmap): remove unused variable `num_regs'. + +Thu Oct 8 19:36:57 1998 Arnold D. Robbins <arnold@gnu.org> + + * BETA Release 3.0.40: Release tar file made. + +Mon Jul 27 10:14:33 1998 Arnold D. Robbins <arnold@gnu.org> + + * node.c (parse_escape): Remove assignment with side effects + from ISXDIGIT test. Thanks to "Mihai T. LAZARESCU" + <mihai@ccmserv.polito.it> for pointing this out. + +Mon Apr 27 11:31:32 1998 Arnold D. Robbins <arnold@gnu.org> + + * main.c (usage): fix the email address for the bug list. + (copyleft): update the copyright year. + +Mon Mar 23 21:22:32 1998 Arnold D. Robbins <arnold@gnu.org> + + * eval.c (r_get_lhs): make sure that values of type + Node_param_list don't have the FUNC flag set. This means + we don't allow the use of a function name as a variable or + array from within the function. + +Sun Mar 22 19:12:32 1998 Paul Eggert <eggert@twinsun.com> + + * aclocal.m4 (GAWK_AC_LARGE_FILES): new macro that checks for + large file support, and updates CPPFLAGS, LDFLAGS, LIBS as + needed. + * configure.in: call GAWK_AC_LARGE_FILES. + * Makefile.in (CPPFLAGS, LDFLAGS): Let autoconf configure. + (COMPFLAGS): Add $(CPPFLAGS). + +Mon Mar 16 14:06:41 1998 Arnold D. Robbins <arnold@gnu.org> + + * field.c (using_FIELDWIDTHS): new macro. + (using_fieldwidths): use new macro. + (do_split): in case for FS_DFLT, also check that + we're not using FIELDWIDTHS. Otherwise, split() would use + FIELDWIDTHS, not current value of FS. Oops. + +Sun Nov 16 20:08:59 1997 Arnold D. Robbins <arnold@gnu.org> + + * builtin.c (sub_common): fix for count of matches in gsub + from Geert.Debyser@esat.kuleuven.ac.be. + +Wed Oct 15 03:38:12 1997 Arnold D. Robbins <arnold@gnu.org> + + * field.c (set_FS): Use `sc_parsefield' if the value of FS is not + alphabetic OR if not ignoring case. Bug fix if IGNORECASE + is true and FS happens to be '^'. Sheesh, talk about obscure. + (rebuild_record): Add more smarts to the code that sets up the + fields. Thanks to Alan J. Broder (ajb@dtmr.com). + +Sun Oct 5 11:56:52 1997 Arnold D. Robbins <arnold@gnu.org> + + * configure.in: if ISC add -D_SYSV3 to CFLAGS, per email from + Mario Vanoni (vanonim@dial.eunet.ch). + +Fri Sep 26 00:57:49 1997 Arnold D. Robbins <arnold@gnu.org> + + * awk.y (append_right): return if either list is NULL. Prevents + syntax errors from causing core dumps. + +Wed Sep 17 15:34:15 1997 Arnold D. Robbins <arnold@gnu.org> + + * field.c (rebuild_record): set things up so that all fields point + into the new record and release any changed fields without + causing memory leaks. Avoids problems when fields are extended + with the value of $0 or other fields and then $0 is assigned to. + +Mon Sep 15 16:12:55 1997 Arnold D. Robbins <arnold@gnu.org> + + * builtin.c (do_print): when testing for NUMBER, make sure + it's not a string too. Thanks to Michael Brennan for + clarifying the semantics. + +Sun Sep 14 19:55:12 1997 Arnold D. Robbins <arnold@gnu.org> + + * node.c (format_val): always format values ourselves: avoids + problems if OFMT is bizarre, like %s. + +Sun Sep 14 00:08:53 1997 Arnold D. Robbins <arnold@gnu.org> + + * io.c (get_a_record): replace all occurrences of the test + `grRS == FALSE' with `RS_is_null' which makes ` RS = "\0" ' + actually work, is clearer code, and actually makes use of + the `RS_is_null' variable! + +Sun Aug 17 07:15:12 1997 Arnold D. Robbins <arnold@gnu.org> + + * field.c (set_FS): Change logic to always set parse_field, even + if FS hasn't changed. Thanks to Igor Sheyn for catching this. + +Wed Aug 6 21:04:37 1997 Arnold D. Robbins <arnold@gnu.org> + + * io.c (VMS et al gawk_popen): use pclose, not fclose, if + iop_alloc fails. + +Wed Jul 30 19:53:52 1997 Arnold D. Robbins <arnold@gnu.org> + + * awk.y [variable]: fix case for subscript if $3 == NULL. + +Sun Jul 27 22:47:30 1997 Arnold D. Robbins <arnold@gnu.org> + + * awk.y (get_src_buf): don't close file if it's stdin. + +Sun Jul 27 22:47:15 1997 Pat Rankin <rankin@eql.caltech.edu> + + * io.c (#if VMS: vmsrtl_fileno): new routine. + (#if VMS: fileno): new macro substituted for stdio one. + +Thu Jul 17 20:05:59 1997 Arnold D. Robbins <arnold@gnu.org> + + * builtin.c (do_print): When OFMT != CONVFMT, create a new + temporary node with just the numeric value valid and format it, + and use that for printing. Avoids memory corruption. + +Wed Jul 16 10:01:16 1997 Arnold D. Robbins <arnold@gnu.org> + + * regex.c: When SYNTAX_TABLE is defined, but not emacs, then + CHAR_SET_SIZE is not defined, though used in regcomp. It should + be taken out of #ifdef SYNTAX_TABLE. Fix from bug group, from + Akim Demaille, demaille@inf.enst.fr. + * awk.h (isnondecimal): make test a little smarter. + builtin.c (nondec2awknum): add bailout for decimal numbers, e.g. + `00.1'. Fix from Larry Schwimmer <rosebud@cyclone.Stanford.EDU>. + +Thu Jun 19 19:00:40 1997 Arnold D. Robbins <arnold@gnu.org> + + * eval.c (interpret): case Node_K_next, Node_K_nextfile: fatal + error if called from BEGIN or END. + (Fixed completely Mon May 3 13:31:42 1999.) + +Mon Jun 9 22:40:04 1997 Arnold D. Robbins <arnold@gnu.org> + + * builtin.c (nondec2awknum): Allow `f' and `F' in hexadecimal numbers. + Gotta get more sleep... + * array.c (assoc_lookup): Fix from Tom Karzes (karzes@equator.com) + for memory leak when forcing type to Node_var_array. + Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us> * Release 3.0.3: Release tar file made. diff --git a/contrib/awk/FUTURES b/contrib/awk/FUTURES index 13a312c..829f168 100644 --- a/contrib/awk/FUTURES +++ b/contrib/awk/FUTURES @@ -73,8 +73,6 @@ In 3.1 Use GNU malloc. - Use rx instead of regex. - DONE: Do a reference card. ? Have strftime() pay attention to the value of ENVIRON["TZ"] diff --git a/contrib/awk/NEWS b/contrib/awk/NEWS index 2a3e7fe..2a3ae4b 100644 --- a/contrib/awk/NEWS +++ b/contrib/awk/NEWS @@ -1,3 +1,79 @@ +Changes from 3.0.3 to 3.0.4 +--------------------------- + +This is a bug fix release only, pending further development on 3.1.0. + +Bugs Fixed: + + 1. A memory leak when turning a function parameter into an array was + fixed. + + 2. The non-decimal data option now works correctly. + + 3. Using an empty pair of brackets as an array subscript no longer causes + a core dump during parsing. In general, syntax errors should not + cause core dumps any more. + + 4. Standard input is no longer closed if it provides program source, + avoiding strange I/O problems. + + 5. Memory corruption during printing with `print' has been fixed. + + 6. The gsub function now correctly counts the number of matches. + + 7. A typo in doc/Makefile.in has been fixed, making installation work. + + 8. Calling `next' or `nextfile' from a BEGIN or END rule is now fatal. + + 9. Subtle problems in rebuilding $0 when fields were changed have been + fixed. + +10. `FS = FS' now correctly turns off the use of FIELDWIDTHS. + +11. Gawk now parses fields correctly when FS is a single character. + +12. It is now possible for RS to be the NUL character ("\0"). + +13. Weird problems with number conversions on MIPS and other systems + have been fixed. + +14. When parsing using FIELDWIDTHS is in effect, split() with no third + argument will still use the value of FS. + +15. Large File Support for Solaris, HP-UX, AIX, and IRIX is now enabled at + compile time, thanks to Paul Eggert. + +16. Attempting to use the name of a function as a variable or array + from within the function is now caught as a fatal error, instead + of as a core dump. + +17. A bug in parsing hex escapes was fixed. + +18. A weird bug with concatenation where one expression has side effects + that changes another was fixed. + +19. printf/sprintf now behave much better for uses of the '0' and '#' flags + and with precisions and field widths. + +20. Further strangenesses with concatenation and multiple accesses of some + of the special variables was fixed. + +21. The Atari port is marked as no longer supported. + +22. Build problems on HP-UX have been fixed. + +23. Minor fixes and additional explanations added to the documentation. + +24. For RS = "", even a single leading newline is now correctly stripped. + +25. Obscure parsing problems for regex constants like /=.../ fixed, so + that a regex constant is recognized, and not the /= operator. + +26. Fixed a bug when closing a redirection that matched the current + or last FILENAME. + +27. Build problems on AIX fixed. + Changes from 3.0.2 to 3.0.3 --------------------------- diff --git a/contrib/awk/PORTS b/contrib/awk/PORTS index c6cbb83..7f30f70 100644 --- a/contrib/awk/PORTS +++ b/contrib/awk/PORTS @@ -1,36 +1,27 @@ -A recent version of gawk has been successfully compiled and run "make test" +Gawk 3.0.4 has been successfully compiled and run "make test" on the following: -Using cc: - Dec Alpha OSF 4.0 - HP9000/755 HP-UX 9.01 - IBM PowerPC AIX 4.1.4.0 - SCO Unix (OpenServer 5) - SGI IRIX 4.0.5 - SGI IRIX 5.3 - SGI IRIX 6.1 - SGI IRIX 6.2 - SunOS 4.1.3 - SunOS 5.5 - IBM SP2 AIX 4.1 +Linux 2.2.5 gcc 2.7.2.3 +Linux 2.0.33 gcc 2.7.2.1 +IRIX64 6.4 gcc 2.8.1 +IRIX 5.3 gcc 2.7.2.2 +UNIX_SV maxion OS 4.2MP gcc 2.7.2 +IRIX 6.2 gcc 2.7.2.2 +CYGWIN_95-4.0 20.1 (0.3/1/1) egcs-2.91.66 (has minor problems due to env.) -Other systems: - DEC Alpha Linux/AXP - DEC Alpha OSF/1 3.2 - DECstation 5000 ULTRIX 4.3 - HP 9000/735 HP-UX 10.01 - IBM RS/6000 AIX 3.2 - IBM SP2 AIX 4.1 - Intel x86 DOS (compiler: djgpp v2, emx+gcc, - and MSC 6.00A, 7, and 8) - Intel x86 Linux 2.0.27 - Intel x86 Linux 2.1.36 - Intel x86 OS+2 (compiler: emx+gcc) - NeXT Turbostation Mach 3.3 - SGI Indigo/2 IRIX 5.3 - SGI O2 IRIX 6.2 - SGI PowerChallenge IRIX 6.1 - Sun SPARC Linux 2.0.22 - Sun SPARC Solaris 2.5 - Sun SPARC Solaris 2.5.1 - Sun SPARC SunOS 4.1.3 +The builds of gawk-3.0.45, and validation and installation, were +successful on these systems: + + DEC Alpha OSF/1 3.2 + HP 9000/735 HP-UX 10.01 + IBM PowerPC AIX 4.2 + Intel Pentium II MMX GNU/Linux 2.0.35 + NeXT Turbostation Mach 3.3 + SGI Indigo/2 IRIX 5.3 + SGI O2 R10000-SC IRIX 6.3 + Sun SPARC Solaris 2.6 + +On + SGI Origin 200 IRIX 6.4 +a build with gcc-2.8.1 succeeded, but several tests failed; a rebuild +with c89 fixed the problem. diff --git a/contrib/awk/POSIX.STD b/contrib/awk/POSIX.STD index ac8e1ab..05129b6 100644 --- a/contrib/awk/POSIX.STD +++ b/contrib/awk/POSIX.STD @@ -1,3 +1,8 @@ +October 1998: + +The 1003.2 work has been at a stand-still for ages. Who knows if or +when a new revision will actually happen... + August 1995: Although the published 1003.2 standard contained the incorrect diff --git a/contrib/awk/README b/contrib/awk/README index 890b16d..49102c5 100644 --- a/contrib/awk/README +++ b/contrib/awk/README @@ -1,10 +1,10 @@ README: -This is GNU Awk 3.0.3. It should be upwardly compatible with the Bell +This is GNU Awk 3.0.4. It should be upwardly compatible with the Bell Labs research version of awk. It is almost completely compliant with the 1993 POSIX 1003.2 standard for awk. (See the note below about POSIX.) -Patches 1 through 3 just fix bugs -- see NEWS and ChangeLog for details. +Patches 1 through 4 just fix bugs -- see NEWS and ChangeLog for details. See the file INSTALL for installation instructions. @@ -66,7 +66,7 @@ about the worst place to post a gawk bug report. Please, use the mechanisms outlined in the manual. Arnold Robbins -INTERNET: arnold@gnu.ai.mit.edu +INTERNET: arnold@gnu.org BUG REPORTS AND FIXES (non-Unix ports): @@ -81,7 +81,7 @@ VMS: Pat Rankin rankin@eql.caltech.edu -Atari ST: +Alpha/Linux: Michal Jaegermann michal@gortel.phys.ualberta.ca diff --git a/contrib/awk/README_d/README.FIRST b/contrib/awk/README_d/README.FIRST index 2ebd5b7..ef527f2 100644 --- a/contrib/awk/README_d/README.FIRST +++ b/contrib/awk/README_d/README.FIRST @@ -18,4 +18,4 @@ If you send me email about this, without having read this file, I will yell at you. Arnold Robbins -arnold@gnu.ai.mit.edu +arnold@gnu.org diff --git a/contrib/awk/acconfig.h b/contrib/awk/acconfig.h index 05f3c61..db80a4c 100644 --- a/contrib/awk/acconfig.h +++ b/contrib/awk/acconfig.h @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1995-1997 the Free Software Foundation, Inc. + * Copyright (C) 1995-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -30,6 +30,9 @@ #undef SPRINTF_RET /* return type of sprintf */ #undef BITOPS /* bitwise ops (undocumented feature) */ #undef NONDECDATA /* non-decimal input data (undocumented feature) */ +#undef _FILE_OFFSET_BITS /* bits in a file offset, where this matters */ +#undef _LARGEFILE_SOURCE /* makes fseeko etc. visible on some hosts */ +#undef _LARGE_FILES /* emables large files on AIX-style hosts */ @BOTTOM@ diff --git a/contrib/awk/array.c b/contrib/awk/array.c index b178cd2..4906384 100644 --- a/contrib/awk/array.c +++ b/contrib/awk/array.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991 - 97 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -272,7 +272,10 @@ NODE *symbol, *subs; fatal("attempt to use scalar as array"); if (symbol->var_array == NULL) { - symbol->type = Node_var_array; + if (symbol->type != Node_var_array) { + unref(symbol->var_value); + symbol->type = Node_var_array; + } symbol->array_size = symbol->table_size = 0; /* sanity */ symbol->flags &= ~ARRAYMAXED; grow_table(symbol); @@ -360,13 +363,14 @@ NODE *symbol, *tree; last = bucket, bucket = bucket->ahnext) if (cmp_nodes(bucket->ahname, subs) == 0) break; - free_temp(subs); if (bucket == NULL) { if (do_lint) warning("delete: index `%s' not in array `%s'", subs->stptr, symbol->vname); + free_temp(subs); return; } + free_temp(subs); if (last != NULL) last->ahnext = bucket->ahnext; else diff --git a/contrib/awk/awk.h b/contrib/awk/awk.h index 630144d..807f700 100644 --- a/contrib/awk/awk.h +++ b/contrib/awk/awk.h @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -25,6 +25,14 @@ /* ------------------------------ Includes ------------------------------ */ +/* + * config.h absolutely, positively, *M*U*S*T* be included before + * any system headers. Otherwise, extreme death, destruction + * and loss of life results. + * + * Well, OK, gawk just won't work on systems using egcs and LFS. But + * that's almost as bad. + */ #ifdef HAVE_CONFIG_H #include <config.h> #endif @@ -34,6 +42,7 @@ #endif /* _GNU_SOURCE */ #include <stdio.h> +#include <assert.h> #ifdef HAVE_LIMITS_H #include <limits.h> #endif /* HAVE_LIMITS_H */ @@ -584,7 +593,8 @@ extern char casetable[]; /* for case-independent regexp matching */ /* ------------------------- Pseudo-functions ------------------------- */ #define is_identchar(c) (isalnum(c) || (c) == '_') -#define isnondecimal(str) (((str)[0]) == '0') +#define isnondecimal(str) (((str)[0]) == '0' && (ISDIGIT((str)[1]) \ + || (str)[1] == 'x' || (str)[1] == 'X')) #ifdef MPROF #define getnode(n) emalloc(n, NODE *, sizeof(NODE), "getnode") diff --git a/contrib/awk/awk.y b/contrib/awk/awk.y index 1b9a89b..93959e8 100644 --- a/contrib/awk/awk.y +++ b/contrib/awk/awk.y @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -830,7 +830,9 @@ variable { $$ = variable($1, CAN_FREE, Node_var); } | NAME '[' expression_list ']' { - if ($3->rnode == NULL) { + if ($3 == NULL) { + fatal("invalid subscript expression"); + } else if ($3->rnode == NULL) { $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode); freenode($3); } else @@ -1169,7 +1171,8 @@ again: warning("source file `%s' is empty", source); } } - close(fd); + if (fileno(stdin) != fd) /* safety */ + close(fd); samefile = FALSE; nextfile++; if (lexeme) @@ -1451,14 +1454,17 @@ retry: case ':': case '?': allow_newline(); - /* fall through */ + return lasttok = c; + case ')': case ']': case '(': - case '[': case ';': case '{': case ',': + want_assign = FALSE; + /* fall through */ + case '[': return lasttok = c; case '*': @@ -2109,6 +2115,9 @@ NODE *list, *new; register NODE *oldlist; static NODE *savefront = NULL, *savetail = NULL; + if (list == NULL || new == NULL) + return list; + oldlist = list; if (savefront == oldlist) { savetail = savetail->rnode = new; diff --git a/contrib/awk/builtin.c b/contrib/awk/builtin.c index 0686041..a4e5a08 100644 --- a/contrib/awk/builtin.c +++ b/contrib/awk/builtin.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -25,7 +25,6 @@ #include "awk.h" -#include <assert.h> #undef HUGE #undef CHARBITS #undef INTBITS @@ -408,6 +407,7 @@ register NODE *carg; double tmpval; char signchar = FALSE; size_t len; + int zero_flag = FALSE; static char sp[] = " "; static char zero_string[] = "0"; static char lchbuf[] = "0123456789abcdef"; @@ -435,6 +435,7 @@ register NODE *carg; prec = 0; have_prec = FALSE; signchar = FALSE; + zero_flag = FALSE; lj = alt = big = bigbig = small = FALSE; fill = sp; cp = cend; @@ -458,10 +459,9 @@ check_pos: break; case '0': + zero_flag = TRUE; if (lj) goto retry; - if (cur == &fw) - fill = zero_string; /* FALL through */ case '1': case '2': @@ -585,6 +585,8 @@ check_pos: goto retry; case 'c': need_format = FALSE; + if (zero_flag && ! lj) + fill = zero_string; parse_next_arg(); /* user input that looks numeric is numeric */ if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM) @@ -609,6 +611,8 @@ check_pos: goto pr_tail; case 's': need_format = FALSE; + if (zero_flag && ! lj) + fill = zero_string; parse_next_arg(); arg = force_string(arg); if (! have_prec || prec > arg->stlen) @@ -620,6 +624,14 @@ check_pos: need_format = FALSE; parse_next_arg(); tmpval = force_number(arg); + + /* + * ``The result of converting a zero value with a + * precision of zero is no characters.'' + */ + if (have_prec && prec == 0 && tmpval == 0) + goto pr_tail; + if (tmpval < 0) { if (tmpval < LONG_MIN) goto out_of_range; @@ -637,17 +649,28 @@ check_pos: *--cp = (char) ('0' + uval % 10); uval /= 10; } while (uval > 0); + + /* add more output digits to match the precision */ + if (have_prec) { + while (cend - cp < prec) + *--cp = '0'; + } + if (sgn) *--cp = '-'; else if (signchar) *--cp = signchar; /* - * precision overrides '0' flags. however, for - * integer formats, precsion is minimum number of - * *digits*, not characters, thus we want to fill - * with zeroes. + * When to fill with zeroes is of course not simple. + * First: No zero fill if left-justifying. + * Next: There seem to be two cases: + * A '0' without a precision, e.g. %06d + * A precision with no field width, e.g. %.10d + * Any other case, we don't want to fill with zeroes. */ - if (have_prec) + if (! lj + && ((zero_flag && ! have_prec) + || (fw == 0 && have_prec))) fill = zero_string; if (prec > fw) fw = prec; @@ -671,6 +694,22 @@ check_pos: need_format = FALSE; parse_next_arg(); tmpval = force_number(arg); + + /* + * ``The result of converting a zero value with a + * precision of zero is no characters.'' + * + * If I remember the ANSI C standard, though, + * it says that for octal conversions + * the precision is artificially increased + * to add an extra 0 if # is supplied. + * Indeed, in C, + * printf("%#.0o\n", 0); + * prints a single 0. + */ + if (! alt && have_prec && prec == 0 && tmpval == 0) + goto pr_tail; + if (tmpval < 0) { if (tmpval < LONG_MIN) goto out_of_range; @@ -683,18 +722,29 @@ check_pos: uval = (unsigned long) tmpval; } /* - * precision overrides '0' flags. however, for - * integer formats, precsion is minimum number of - * *digits*, not characters, thus we want to fill - * with zeroes. + * When to fill with zeroes is of course not simple. + * First: No zero fill if left-justifying. + * Next: There seem to be two cases: + * A '0' without a precision, e.g. %06d + * A precision with no field width, e.g. %.10d + * Any other case, we don't want to fill with zeroes. */ - if (have_prec) + if (! lj + && ((zero_flag && ! have_prec) + || (fw == 0 && have_prec))) fill = zero_string; do { *--cp = chbuf[uval % base]; uval /= base; } while (uval > 0); - if (alt) { + + /* add more output digits to match the precision */ + if (have_prec) { + while (cend - cp < prec) + *--cp = '0'; + } + + if (alt && tmpval != 0) { if (base == 16) { *--cp = cs1; *--cp = '0'; @@ -753,7 +803,7 @@ check_pos: *cp++ = signchar; if (alt) *cp++ = '#'; - if (fill != sp) + if (zero_flag) *cp++ = '0'; cp = strcpy(cp, "*.*") + 3; *cp++ = cs1; @@ -1082,6 +1132,7 @@ register NODE *tree; register FILE *fp; int numnodes, i; NODE *save; + NODE *tval; if (tree->rnode) { int errflg; /* not used, sigh */ @@ -1115,25 +1166,29 @@ register NODE *tree; t[i] = dupnode(n); free_temp(n); - if (t[i]->flags & NUMBER) { + if ((t[i]->flags & (NUMBER|STRING)) == NUMBER) { if (OFMTidx == CONVFMTidx) (void) force_string(t[i]); - else - t[i] = format_val(OFMT, OFMTidx, t[i]); + else { + tval = tmp_number(t[i]->numbr); + unref(t[i]); + t[i] = format_val(OFMT, OFMTidx, tval); + } } } for (i = 0; i < numnodes; i++) { efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE); unref(t[i]); - if (i != numnodes - 1) { - if (OFSlen > 0) - efwrite(OFS, sizeof(char), (size_t) OFSlen, - fp, "print", rp, FALSE); - } + + if (i != numnodes - 1 && OFSlen > 0) + efwrite(OFS, sizeof(char), (size_t) OFSlen, + fp, "print", rp, FALSE); + } if (ORSlen > 0) efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE); + free(t); } @@ -1506,6 +1561,7 @@ int how_many, backdigs; */ if (lastmatchnonzero && matchstart == matchend) { lastmatchnonzero = FALSE; + matches--; goto empty; } /* @@ -2016,6 +2072,7 @@ size_t len; case 'c': case 'd': case 'e': + case 'f': val = *str - 'a' + 10; break; case 'A': @@ -2023,6 +2080,7 @@ size_t len; case 'C': case 'D': case 'E': + case 'F': val = *str - 'A' + 10; break; default: @@ -2033,11 +2091,12 @@ size_t len; } else if (*str == '0') { for (; len > 0; len--) { if (! isdigit(*str) || *str == '8' || *str == '9') - goto done; + goto decimal; retval = (retval * 8) + (*str - '0'); str++; } } else { +decimal: save = str[len]; retval = atof(str); str[len] = save; diff --git a/contrib/awk/custom.h b/contrib/awk/custom.h index 833bb62..9bc914f 100644 --- a/contrib/awk/custom.h +++ b/contrib/awk/custom.h @@ -7,11 +7,11 @@ * information. * * If you make additions to this file for your system, please send me - * the information, to arnold@gnu.ai.mit.edu. + * the information, to arnold@gnu.org. */ /* - * Copyright (C) 1995-1997 the Free Software Foundation, Inc. + * Copyright (C) 1995-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -57,3 +57,10 @@ #ifdef _SEQUENT_ #undef HAVE_MMAP #endif + +/* For BeOS, from mc@whoever.com */ +#if defined(__dest_os) && __dest_os == __be_os +#define BROKEN_STRNCASECMP +#define ELIDE_CODE +#include <alloca.h> +#endif diff --git a/contrib/awk/doc/ChangeLog b/contrib/awk/doc/ChangeLog index 660436a..17f2844 100644 --- a/contrib/awk/doc/ChangeLog +++ b/contrib/awk/doc/ChangeLog @@ -1,3 +1,30 @@ +Wed Jun 30 16:14:36 1999 Arnold D. Robbins <arnold@gnu.org> + + * Release 3.0.4: Release tar file made. This time for sure. + +Wed Oct 7 21:59:33 1998 Arnold D. Robbins <arnold@gnu.org> + + * texinfo.tex: Updated to version 2.227, from Texinfo 3.12. + +Sun Oct 19 12:26:08 1997 Arnold D. Robbins <arnold@gnu.org> + + * ALL: change references to arnold@gnu.ai.mit.edu to arnold@gnu.org. + +Tue Sep 23 10:31:17 1997 Arnold D. Robbins <arnold@gnu.org> + + * texinfo.tex: Updated to version 2.218, from Texinfo 3.11. + +Fri Jul 4 08:19:00 1997 Arnold D. Robbins <arnold@gnu.org> + + * Makefile.in ($(infodir)/gawk.info): Don't make dependent upon + gawk.info, in case installed one is newer. Instead, check that + an installed gawk.info exists and is identical to current one. + If so, just exit; otherwise do the install. + +Wed Jul 2 14:55:12 1997 Arnold D. Robbins <arnold@gnu.org> + + * Makefile.in ($(infodir)/gawk.info): typo fix. + Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us> * Release 3.0.3: Release tar file made. diff --git a/contrib/awk/doc/awk.1 b/contrib/awk/doc/awk.1 index 0568c16..f15d4f1 100644 --- a/contrib/awk/doc/awk.1 +++ b/contrib/awk/doc/awk.1 @@ -1,7 +1,7 @@ .ds PX \s-1POSIX\s+1 .ds UX \s-1UNIX\s+1 .ds AN \s-1ANSI\s+1 -.TH GAWK 1 "Dec 19 1996" "Free Software Foundation" "Utility Commands" +.TH GAWK 1 "Apr 28 1999" "Free Software Foundation" "Utility Commands" .SH NAME gawk \- pattern scanning and processing language .SH SYNOPSIS @@ -2150,6 +2150,12 @@ Functions may call each other and may be recursive. Function parameters used as local variables are initialized to the null string and the number zero upon function invocation. .PP +Use +.BI return " expr" +to return a value from a function. The return value is undefined if no +value is provided, or if the function returns by ``falling off'' the +end. +.PP If .B \-\^\-lint has been provided, @@ -2535,7 +2541,7 @@ and the effort to do so really is not worth it. .SH VERSION INFORMATION This man page documents .IR gawk , -version 3.0.2. +version 3.0.4. .SH AUTHORS The original version of \*(UX .I awk @@ -2566,10 +2572,10 @@ help from Darrel Hankerson. Fred Fish supplied support for the Amiga. If you find a bug in .IR gawk , please send electronic mail to -.BR bug-gnu-utils@prep.ai.mit.edu , +.BR bug-gnu-utils@gnu.org , .I with a carbon copy to -.BR arnold@gnu.ai.mit.edu . +.BR arnold@gnu.org . Please include your operating system and its revision, the version of .IR gawk , what C compiler you used to compile it, and a test program @@ -2598,7 +2604,7 @@ Brian Kernighan of Bell Labs provided valuable assistance during testing and debugging. We thank him. .SH COPYING PERMISSIONS -Copyright \(co) 1996 Free Software Foundation, Inc. +Copyright \(co) 1996,97,98,99 Free Software Foundation, Inc. .PP Permission is granted to make and distribute verbatim copies of this manual page provided the copyright notice and this permission diff --git a/contrib/awk/doc/gawk.texi b/contrib/awk/doc/gawk.texi index 8c2aad2..3e8e102 100644 --- a/contrib/awk/doc/gawk.texi +++ b/contrib/awk/doc/gawk.texi @@ -21,10 +21,10 @@ @c applies to, and when the document was updated. @set TITLE Effective AWK Programming @set SUBTITLE A User's Guide for GNU Awk -@set PATCHLEVEL 3 +@set PATCHLEVEL 4 @set EDITION 1.0.@value{PATCHLEVEL} @set VERSION 3.0 -@set UPDATE-MONTH February 1997 +@set UPDATE-MONTH April, 1999 @iftex @set DOCUMENT book @end iftex @@ -74,7 +74,7 @@ particular records in a file and perform operations upon them. This is Edition @value{EDITION} of @cite{@value{TITLE}}, for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation of AWK. -Copyright (C) 1989, 1991, 92, 93, 96, 97 Free Software Foundation, Inc. +Copyright (C) 1989, 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -138,7 +138,7 @@ Corporation. @* Registered Trademark of Paramount Pictures Corporation. @* @c sorry, i couldn't resist @sp 3 -Copyright @copyright{} 1989, 1991, 92, 93, 96, 97 Free Software Foundation, Inc. +Copyright @copyright{} 1989, 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. @sp 2 This is Edition @value{EDITION} of @cite{@value{TITLE}}, @* @@ -153,16 +153,16 @@ for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU impleme @item Seattle, WA 98155 USA @tab Boston, MA 02111-1307 USA @item Phone: +1-206-782-7733 @tab Phone: +1-617-542-5942 @item Fax: +1-206-782-7191 @tab Fax: +1-617-542-2652 -@item E-mail: @code{sales@@ssc.com} @tab E-mail: @code{gnu@@prep.ai.mit.edu} +@item E-mail: @code{sales@@ssc.com} @tab E-mail: @code{gnu@@gnu.org} @item URL: @code{http://www.ssc.com/} @tab URL: @code{http://www.fsf.org/} @end multitable @sp 1 @c this ISBN can change! Check with SSC @c This one is correct for gawk 3.0 and edition 1.0 from the FSF -@c ISBN 1-882114-26-4 @* +ISBN 1-882114-26-4 @* @c This one is correct for gawk 3.0.3 and edition 1.0.3 from SSC -ISBN 1-57831-000-8 @* +@c ISBN 1-57831-000-8 @* Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -1387,6 +1387,12 @@ Self-contained @code{awk} scripts are useful when you want to write a program which users can invoke without their having to know that the program is written in @code{awk}. +@strong{Caution:} You should not put more than one argument on the @samp{#!} +line after the path to @code{awk}. This will not work. The operating system +treats the rest of the line as a single agument, and passes it to @code{awk}. +Doing this will lead to confusing behavior: most likely a usage diagnostic +of some sort from @code{awk}. + @cindex shell scripts @cindex scripts, shell Some older systems do not support the @samp{#!} mechanism. You can get a @@ -1441,6 +1447,21 @@ programs also, but this usually isn't very useful; the purpose of a comment is to help you or another person understand the program at a later time. +@strong{Caution:} As mentioned in +@ref{One-shot, ,One-shot Throw-away @code{awk} Programs}, +you can enclose small to medium programs in single quotes, in order to keep +your shell scripts self-contained. When doing so, @emph{don't} put +an apostrophe (i.e., a single quote) into a comment (or anywhere else +in your program). The shell will interpret the quote as the closing +quote for the entire program. As a result, usually the shell will +print a message about mismatched quotes, and if @code{awk} actually +runs, it will probably print strange messages about syntax errors. +For example: + +@example +awk 'BEGIN @{ print "hello" @} # let's be cute' +@end example + @node Very Simple, Two Rules, Running gawk, Getting Started @section A Very Simple Example @@ -2142,7 +2163,7 @@ listed in the table above. Another interesting question arises. Suppose you use an octal or hexadecimal escape to represent a regexp metacharacter (@pxref{Regexp Operators, , Regular Expression Operators}). -Does @code{awk} treat the character as literal character, or as a regexp +Does @code{awk} treat the character as a literal character, or as a regexp operator? @cindex dark corner @@ -4450,6 +4471,8 @@ According to POSIX, @samp{@var{expression} | getline} is ambiguous if because the concatenation operator is not parenthesized, and you should write it as @samp{("echo " "date") | getline} if you want your program to be portable to other @code{awk} implementations. +(It happens that @code{gawk} gets it right, but you should not +rely on this. Parentheses make it easier to read, anyway.) @node Getline/Variable/Pipe, Getline Summary, Getline/Pipe, Getline @subsection Using @code{getline} Into a Variable from a Pipe @@ -4482,6 +4505,8 @@ According to POSIX, @samp{@var{expression} | getline @var{var}} is ambiguous if because the concatenation operator is not parenthesized, and you should write it as @samp{("echo " "date") | getline @var{var}} if you want your program to be portable to other @code{awk} implementations. +(It happens that @code{gawk} gets it right, but you should not +rely on this. Parentheses make it easier to read, anyway.) @end ifinfo @node Getline Summary, , Getline/Variable/Pipe, Getline @@ -4616,7 +4641,7 @@ single @code{print} can make any number of lines this way. Here is an example of printing a string that contains embedded newlines (the @samp{\n} is an escape sequence, used to represent the newline -character; see @ref{Escape Sequences}): +character; @pxref{Escape Sequences}): @example @group @@ -6975,6 +7000,8 @@ by putting a newline after either character. However, you cannot put a newline in front of either character without using backslash continuation (@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}). +If @samp{--posix} is specified +(@pxref{Options, , Command Line Options}), then this extension is disabled. @node Function Calls, Precedence, Conditional Exp, Expressions @section Function Calls @@ -8264,7 +8291,7 @@ example: @example @group BEGIN @{ - if (("date" | getline date_now) < 0) @{ + if (("date" | getline date_now) <= 0) @{ print "Can't get system date" > "/dev/stderr" exit 1 @} @@ -8627,9 +8654,27 @@ contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains Notice that the @code{awk} program is not entered in @code{ARGV}. The other special command line options, with their arguments, are also not -entered. But variable assignments on the command line @emph{are} +entered. This includes variable assignments done with the @samp{-v} +option (@pxref{Options, ,Command Line Options}). +Normal variable assignments on the command line @emph{are} treated as arguments, and do show up in the @code{ARGV} array. +@example +$ cat showargs.awk +@print{} BEGIN @{ +@print{} printf "A=%d, B=%d\n", A, B +@print{} for (i = 0; i < ARGC; i++) +@print{} printf "\tARGV[%d] = %s\n", i, ARGV[i] +@print{} @} +@print{} END @{ printf "A=%d, B=%d\n", A, B @} +$ awk -v A=1 -f showargs.awk B=2 /dev/null +@print{} A=1, B=0 +@print{} ARGV[0] = awk +@print{} ARGV[1] = B=2 +@print{} ARGV[2] = /dev/null +@print{} A=1, B=2 +@end example + Your program can alter @code{ARGC} and the elements of @code{ARGV}. Each time @code{awk} reaches the end of an input file, it uses the next element of @code{ARGV} as the name of the next input file. By storing a @@ -9172,6 +9217,14 @@ clears out the target array first. This call asks it to split apart the null string. Since there is no data to split out, the function simply clears the array and then returns. +@strong{Caution:} Deleting an array does not change its type; you cannot +delete an array and then use the array's name as a scalar. For +example, this will not work: + +@example +a[1] = 3; delete a; a = 3 +@end example + @node Numeric Array Subscripts, Uninitialized Subscripts, Delete, Arrays @section Using Numbers to Subscript Arrays @@ -9785,6 +9838,10 @@ string (d.c.). The POSIX standard allows this as well. Before splitting the string, @code{split} deletes any previously existing elements in the array @var{array} (d.c.). +If @var{string} does not match @var{fieldsep} at all, @var{array} will have +one element. The value of that element will be the original +@var{string}. + @item sprintf(@var{format}, @var{expression1},@dots{}) @findex sprintf This returns (without printing) the string that @code{printf} would @@ -9901,6 +9958,9 @@ such as in the above example. However, using any other non-changeable object as the third parameter will cause a fatal error, and your program will not run. +Finally, if the @var{regexp} is not a regexp constant, it is converted into a +string and then the value of that string is treated as the regexp to match. + @item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]}) @findex gsub This is similar to the @code{sub} function, except @code{gsub} replaces @@ -9980,6 +10040,9 @@ If the @var{how} argument is a string that does not begin with @samp{g} or @samp{G}, or if it is a number that is less than zero, only one substitution is performed. +If @var{regexp} does not match @var{target}, @code{gensub}'s return value +is the original, unchanged value of @var{target}. + @cindex differences between @code{gawk} and @code{awk} @code{gensub} is a @code{gawk} extension; it is not available in compatibility mode (@pxref{Options, ,Command Line Options}). @@ -10310,7 +10373,7 @@ and nonzero otherwise. @item system(@var{command}) @findex system @cindex interaction, @code{awk} and other programs -The system function allows the user to execute operating system commands +The @code{system} function allows the user to execute operating system commands and then return to the @code{awk} program. The @code{system} function executes the command given by the string @var{command}. It returns, as its value, the status returned by the command that was executed. @@ -10329,7 +10392,17 @@ the system administrator will be sent mail when the @code{awk} program finishes processing input and begins its end-of-input processing. Note that redirecting @code{print} or @code{printf} into a pipe is often -enough to accomplish your task. However, if your @code{awk} +enough to accomplish your task. If you need to run many commands, it +will be more efficient to simply print them to a pipe to the shell: + +@example +while (@var{more stuff to do}) + print @var{command} | "/bin/sh" +close("/bin/sh") +@end example + +@noindent +However, if your @code{awk} program is interactive, @code{system} is useful for cranking up large self-contained programs, such as a shell or an editor. @@ -11889,7 +11962,7 @@ Here is a second version of @code{nextfile} that remedies this problem. @c file eg/lib/nextfile.awk # nextfile --- skip remaining records in current file # correctly handle successive occurrences of the same file -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May, 1993 # this should be read in before the "main" awk program @@ -11983,7 +12056,7 @@ that is being tested. @c @group @c file eg/lib/assert.awk # assert --- assert that a condition is true. Otherwise exit. -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May, 1993 function assert(condition, string) @@ -12038,7 +12111,8 @@ mydata:1357: assertion failed: a <= 5 && b >= 17 @end example There is a problem with this version of @code{assert}, that it may not -be possible to work around. An @code{END} rule is automatically added +be possible to work around with standard @code{awk}. +An @code{END} rule is automatically added to the program calling @code{assert}. Normally, if a program consists of just a @code{BEGIN} rule, the input files and/or standard input are not read. However, now that the program has an @code{END} rule, @code{awk} @@ -12069,7 +12143,7 @@ it might be useful if your awk's @code{printf} does unbiased rounding. @c file eg/lib/round.awk # round --- do normal rounding # -# Arnold Robbins, arnold@@gnu.ai.mit.edu, August, 1996 +# Arnold Robbins, arnold@@gnu.org, August, 1996 # Public Domain function round(x, ival, aval, fraction) @@ -12130,7 +12204,7 @@ reason to build them into the @code{awk} interpreter. # _ord_init: function to initialize _ord_ # # Arnold Robbins -# arnold@@gnu.ai.mit.edu +# arnold@@gnu.org # Public Domain # 16 January, 1992 # 20 July, 1992, revised @@ -12254,7 +12328,7 @@ assumption since the array was likely created with @code{split} @group @c file eg/lib/join.awk # join.awk --- join an array into a string -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 function join(array, start, end, sep, result, i) @@ -12329,7 +12403,7 @@ assignment. @c file eg/lib/mktime.awk # mktime.awk --- convert a canonical date representation # into a timestamp -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 BEGIN \ @@ -12624,7 +12698,7 @@ time formatted in the same way as the @code{date} utility. @c @group @c file eg/lib/gettime.awk # gettimeofday --- get the time of day in a usable format -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain, May 1993 +# Arnold Robbins, arnold@@gnu.org, Public Domain, May 1993 # # Returns a string in the format of output of date(1) # Populates the array argument time with individual values: @@ -12732,7 +12806,7 @@ Besides solving the problem in only nine(!) lines of code, it does so # that each take the name of the file being started or # finished, respectively. # -# Arnold Robbins, arnold@@gnu.ai.mit.edu, January 1992 +# Arnold Robbins, arnold@@gnu.org, January 1992 # Public Domain FILENAME != _oldfilename \ @@ -12784,7 +12858,7 @@ This version solves the problem. # # user supplies beginfile() and endfile() functions # -# Arnold Robbins, arnold@@gnu.ai.mit.edu. November 1992 +# Arnold Robbins, arnold@@gnu.org, November 1992 # Public Domain FNR == 1 @{ @@ -12929,7 +13003,7 @@ The discussion walks through the code a bit at a time. @c file eg/lib/getopt.awk # getopt --- do C library getopt(3) function in awk # -# arnold@@gnu.ai.mit.edu +# arnold@@gnu.org # Public domain # # Initial version: March, 1991 @@ -13206,7 +13280,7 @@ Here is @code{pwcat}, a C program that ``cats'' the password database. * Generate a printable version of the password database * * Arnold Robbins - * arnold@@gnu.ai.mit.edu + * arnold@@gnu.org * May 1993 * Public Domain */ @@ -13289,7 +13363,7 @@ functions of the same name. @c file eg/lib/passwdawk.in @group # passwd.awk --- access password file information -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 BEGIN @{ @@ -13477,7 +13551,7 @@ Here is @code{grcat}, a C program that ``cats'' the group database. * * Generate a printable version of the group database * - * Arnold Robbins, arnold@@gnu.ai.mit.edu + * Arnold Robbins, arnold@@gnu.org * May 1993 * Public Domain */ @@ -13558,7 +13632,7 @@ There are several, modeled after the C library functions of the same names. @group @c file eg/lib/groupawk.in # group.awk --- functions for dealing with the group file -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 BEGIN \ @@ -13725,7 +13799,7 @@ user name, and returns the list of groups that have the user as a member. function getgrent() @{ _gr_init() - if (++gr_count in _gr_bycount) + if (++_gr_count in _gr_bycount) return _gr_bycount[_gr_count] return "" @} @@ -13947,7 +14021,7 @@ if invalid arguments are supplied. @c @group @c file eg/prog/cut.awk # cut.awk --- implement cut in awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # Options: @@ -14275,7 +14349,7 @@ that processes the command line arguments with @code{getopt}. The @samp{-i} @c @group @c file eg/prog/egrep.awk # egrep.awk --- simulate egrep in awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # Options: @@ -14558,7 +14632,7 @@ numbers. @c @group @c file eg/prog/id.awk # id.awk --- implement id in awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # output is: @@ -14670,7 +14744,7 @@ is used as the prefix for the output file names. @c @group @c file eg/prog/split.awk # split.awk --- do split in awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # usage: split [-num] [file] [outname] @@ -14798,7 +14872,7 @@ Finally, @code{awk} is forced to read the standard input by setting @group @c file eg/prog/tee.awk # tee.awk --- tee in awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # Revised December 1995 @end group @@ -14961,7 +15035,7 @@ standard output, @file{/dev/stdout}. @c @group @c file eg/prog/uniq.awk # uniq.awk --- do uniq in awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 @group @@ -15197,7 +15271,7 @@ be true if more than one file was named on the command line. @c @group @c file eg/prog/wc.awk # wc.awk --- count lines, words, characters -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # Options: @@ -15354,7 +15428,7 @@ unusual. @group @c file eg/prog/dupword.awk # dupword --- find duplicate words in text -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # December 1991 @{ @@ -15398,7 +15472,7 @@ to itself, in case the user is not looking at their computer or terminal.) @c @group @c file eg/prog/alarm.awk # alarm --- set an alarm -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # usage: alarm time [ "message" [ count [ delay ] ] ] @@ -15585,7 +15659,7 @@ Finally, the processing rule simply calls @code{translate} for each record. @c @group @c file eg/prog/translate.awk # translate --- do tr like stuff -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # August 1989 # bugs: does not handle things like: tr A-Z a-z, it has @@ -15703,7 +15777,7 @@ not have been an even multiple of 20 labels in the data. @c @group @c file eg/prog/labels.awk # labels.awk -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # June 1992 # Program to print labels. Each label is 5 lines of data @@ -15919,7 +15993,7 @@ The @code{END} rule simply prints out the lines, in order. @group @c file eg/prog/histsort.awk # histsort.awk --- compact a shell history file -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 # Thanks to Byron Rakitzis for the general idea @@ -16016,6 +16090,28 @@ and @samp{endfile} lines. The @code{gawk} distribution uses a copy of @file{extract.awk} to extract the sample programs and install many of them in a standard directory, where @code{gawk} can find them. +The Texinfo file looks something like this: + +@example +@dots{} +This program has a @@code@{BEGIN@} block, +which prints a nice message: + +@@example +@@c file examples/messages.awk +BEGIN @@@{ print "Don't panic!" @@@} +@@c end file +@@end example + +It also prints some final advice: + +@@example +@@c file examples/messages.awk +END @@@{ print "Always avoid bored archeologists!" @@@} +@@c end file +@@end example +@dots{} +@end example @file{extract.awk} begins by setting @code{IGNORECASE} to one, so that mixed upper-case and lower-case letters in the directives won't matter. @@ -16030,7 +16126,7 @@ exited with a zero exit status, signifying OK. @c file eg/prog/extract.awk # extract.awk --- extract files and run programs # from texinfo files -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # May 1993 BEGIN @{ IGNORECASE = 1 @} @@ -16080,6 +16176,9 @@ function. If the line is an ``endfile'' line, then it breaks out of the loop. If the line is an @samp{@@group} or @samp{@@end group} line, then it ignores it, and goes on to the next line. +(These Texinfo control lines keep blocks of code together on one page; +unfortunately, @TeX{} isn't always smart enough to do things exactly right, +and we have to give it some advice.) Most of the work is in the following few lines. If the line has no @samp{@@} symbols, it can be printed directly. Otherwise, each leading @samp{@@} must be @@ -16213,7 +16312,7 @@ are provided, the standard input is used. # awksed.awk --- do s/foo/bar/g using just print # Thanks to Michael Brennan for the idea -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # August 1995 @group @@ -16447,7 +16546,7 @@ Here's the program: #! /bin/sh # igawk --- like gawk but do @@include processing -# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain +# Arnold Robbins, arnold@@gnu.org, Public Domain # July 1993 if [ "$1" = debug ] @@ -18493,7 +18592,7 @@ Free Software Foundation @* Boston, MA 02111-1307 USA @* Phone: +1-617-542-5942 @* Fax (including Japan): +1-617-542-2652 @* -E-mail: @code{gnu@@prep.ai.mit.edu} @* +E-mail: @code{gnu@@gnu.org} @* @end quotation @noindent @@ -18502,7 +18601,7 @@ and to the production of more free software. @item You can get @code{gawk} by using anonymous @code{ftp} to the Internet host -@code{ftp.gnu.ai.mit.edu}, in the directory @file{/pub/gnu}. +@code{gnudist.gnu.org}, in the directory @file{/gnu/gawk}. Here is a list of alternate @code{ftp} sites from which you can obtain GNU software. When a site is listed as ``@var{site}@code{:}@var{directory}'' the @@ -19318,17 +19417,17 @@ can send mail to. @table @asis @item Internet: -@samp{bug-gnu-utils@@prep.ai.mit.edu} +@samp{bug-gnu-utils@@gnu.org} @item UUCP: -@samp{uunet!prep.ai.mit.edu!bug-gnu-utils} +@samp{uunet!gnu.org!bug-gnu-utils} @end table Please include the version number of @code{gawk} you are using. You can get this information with the command @samp{gawk --version}. You should send a carbon copy of your mail to Arnold Robbins, who can -be reached at @samp{arnold@@gnu.ai.mit.edu}. +be reached at @samp{arnold@@gnu.org}. @cindex @code{comp.lang.awk} @strong{Important!} Do @emph{not} try to report bugs in @code{gawk} by @@ -19408,13 +19507,15 @@ This section briefly describes where to get them. @item Unix @code{awk} Brian Kernighan has been able to make his implementation of @code{awk} freely available. You can get it via anonymous @code{ftp} -to the host @code{@w{netlib.att.com}}. Change directory to +to the host @code{@w{netlib.bell-labs.com}}. Change directory to @file{/netlib/research}. Use ``binary'' or ``image'' mode, and -retrieve @file{awk.bundle.Z}. +retrieve @file{awk.bundle.gz}. + +This is a shell archive that has been compressed with the GNU @code{gzip} +utility. It can be uncompressed with the @code{gunzip} utility. -This is a shell archive that has been compressed with the @code{compress} -utility. It can be uncompressed with either @code{uncompress} or the -GNU @code{gunzip} utility. +You can also retrieve this version via the World Wide Web from +@uref{http://cm.bell-labs.com/who/bwk, Brian Kernighan's home page}. This version requires an ANSI C compiler; GCC (the GNU C compiler) works quite nicely. @@ -19496,7 +19597,7 @@ and any considerations you should bear in mind. You are free to add any new features you like to @code{gawk}. However, if you want your changes to be incorporated into the @code{gawk} distribution, there are several steps that you need to take in order to -make it possible for me to include to your changes. +make it possible for me to include your changes. @enumerate 1 @item @@ -19598,7 +19699,7 @@ effect, or assign the copyright in your changes to the FSF. Both of these actions are easy to do, and @emph{many} people have done so already. If you have questions, please contact me (@pxref{Bugs, , Reporting Problems and Bugs}), -or @code{gnu@@prep.ai.mit.edu}. +or @code{gnu@@gnu.org}. @item Update the documentation. @@ -19718,7 +19819,7 @@ effect, or assign the copyright in your code to the FSF. @ifinfo Both of these actions are easy to do, and @emph{many} people have done so already. If you have questions, please contact me, or -@code{gnu@@prep.ai.mit.edu}. +@code{gnu@@gnu.org}. @end ifinfo @end enumerate @@ -19855,11 +19956,6 @@ important regexp matching issues. The GNU version of @code{malloc} could potentially speed up @code{gawk}, since it relies heavily on the use of dynamic memory allocation. -@item Use of the @code{rx} regexp library -The @code{rx} regular expression library could potentially speed up -all regexp operations that require knowing the exact location of matches. -This includes record termination, field and array splitting, -and the @code{sub}, @code{gsub}, @code{gensub} and @code{match} functions. @end table @node Improvements, , Future Extensions, Notes @@ -20006,7 +20102,7 @@ countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1). A preprocessor for @code{pic} that reads descriptions of molecules and produces @code{pic} input for drawing them. It was written in @code{awk} by Brian Kernighan and Jon Bentley, and is available from -@code{@w{netlib@@research.att.com}}. +@email{@w{netlib@@research.bell-labs.com}}. @item Compound Statement A series of @code{awk} statements, enclosed in curly braces. Compound diff --git a/contrib/awk/eval.c b/contrib/awk/eval.c index aa2e881..b314019 100644 --- a/contrib/awk/eval.c +++ b/contrib/awk/eval.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -25,8 +25,6 @@ #include "awk.h" -#include <assert.h> - extern double pow P((double x, double y)); extern double modf P((double x, double *yp)); extern double fmod P((double x, double y)); @@ -566,14 +564,26 @@ register NODE *volatile tree; break; case Node_K_next: + if (in_begin_rule) + fatal("`next' cannot be called from a BEGIN rule"); + else if (in_end_rule) + fatal("`next' cannot be called from an END rule"); + if (in_function()) pop_fcall_stack(); + longjmp(rule_tag, TAG_CONTINUE); break; case Node_K_nextfile: + if (in_begin_rule) + fatal("`nextfile' cannot be called from a BEGIN rule"); + else if (in_end_rule) + fatal("`nextfile' cannot be called from an END rule"); + if (in_function()) pop_fcall_stack(); + do_nextfile(); break; @@ -759,7 +769,8 @@ int iscond; register size_t len; char *str; register char *dest; - int count; + int alloc_count, str_count; + int i; /* * This is an efficiency hack for multiple adjacent string @@ -773,16 +784,16 @@ int iscond; /* * But first, no arbitrary limits. Count the number of * nodes and malloc the treelist and strlist arrays. - * There will be count + 1 items to concatenate. We + * There will be alloc_count + 1 items to concatenate. We * also leave room for an extra pointer at the end to - * use as a sentinel. Thus, start count at 2. + * use as a sentinel. Thus, start alloc_count at 2. */ save_tree = tree; - for (count = 2; tree && tree->type == Node_concat; tree = tree->lnode) - count++; + for (alloc_count = 2; tree && tree->type == Node_concat; tree = tree->lnode) + alloc_count++; tree = save_tree; - emalloc(treelist, NODE **, sizeof(NODE *) * count, "tree_eval"); - emalloc(strlist, NODE **, sizeof(NODE *) * count, "tree_eval"); + emalloc(treelist, NODE **, sizeof(NODE *) * alloc_count, "tree_eval"); + emalloc(strlist, NODE **, sizeof(NODE *) * alloc_count, "tree_eval"); /* Now, here we go. */ treep = treelist; @@ -795,15 +806,26 @@ int iscond; * Now, evaluate to strings in LIFO order, accumulating * the string length, so we can do a single malloc at the * end. + * + * Evaluate the expressions first, then get their + * lengthes, in case one of the expressions has a + * side effect that changes one of the others. + * See test/nasty.awk. */ strp = strlist; len = 0; while (treep >= treelist) { *strp = force_string(tree_eval(*treep--)); - len += (*strp)->stlen; strp++; } *strp = NULL; + + str_count = strp - strlist; + strp = strlist; + for (i = 0; i < str_count; i++) { + len += (*strp)->stlen; + strp++; + } emalloc(str, char *, len+2, "tree_eval"); str[len] = str[len+1] = '\0'; /* for good measure */ dest = str; @@ -1397,6 +1419,11 @@ NODE *arg_list; /* Node_expression_list of calling args. */ * r_get_lhs: * This returns a POINTER to a node pointer. get_lhs(ptr) is the current * value of the var, or where to store the var's new value + * + * For the special variables, don't unref their current value if it's + * the same as the internal copy; perhaps the current one is used in + * a concatenation or some other expression somewhere higher up in the + * call chain. Ouch. */ NODE ** @@ -1409,8 +1436,11 @@ Func_ptr *assign; if (assign) *assign = NULL; /* for safety */ - if (ptr->type == Node_param_list) + if (ptr->type == Node_param_list) { + if ((ptr->flags & FUNC) != 0) + fatal("can't use function name `%s' as variable or array", ptr->vname); ptr = stack_ptr[ptr->param_cnt]; + } switch (ptr->type) { case Node_var_array: @@ -1444,26 +1474,32 @@ Func_ptr *assign; break; case Node_FNR: - unref(FNR_node->var_value); - FNR_node->var_value = make_number((AWKNUM) FNR); + if (FNR_node->var_value->numbr != FNR) { + unref(FNR_node->var_value); + FNR_node->var_value = make_number((AWKNUM) FNR); + } aptr = &(FNR_node->var_value); if (assign != NULL) *assign = set_FNR; break; case Node_NR: - unref(NR_node->var_value); - NR_node->var_value = make_number((AWKNUM) NR); + if (NR_node->var_value->numbr != NR) { + unref(NR_node->var_value); + NR_node->var_value = make_number((AWKNUM) NR); + } aptr = &(NR_node->var_value); if (assign != NULL) *assign = set_NR; break; case Node_NF: - if (NF == -1) - (void) get_field(HUGE-1, assign); /* parse record */ - unref(NF_node->var_value); - NF_node->var_value = make_number((AWKNUM) NF); + if (NF == -1 || NF_node->var_value->numbr != NF) { + if (NF == -1) + (void) get_field(HUGE-1, assign); /* parse record */ + unref(NF_node->var_value); + NF_node->var_value = make_number((AWKNUM) NF); + } aptr = &(NF_node->var_value); if (assign != NULL) *assign = set_NF; diff --git a/contrib/awk/field.c b/contrib/awk/field.c index 31c9628..052c0c1 100644 --- a/contrib/awk/field.c +++ b/contrib/awk/field.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -24,7 +24,6 @@ */ #include "awk.h" -#include <assert.h> typedef void (* Setfunc) P((long, char *, long, NODE *)); @@ -63,6 +62,9 @@ int default_FS; /* TRUE when FS == " " */ Regexp *FS_regexp = NULL; static NODE *Null_field = NULL; +/* using_FIELDWIDTHS --- static function, macro to avoid overhead */ +#define using_FIELDWIDTHS() (parse_field == fw_parse_field) + /* init_fields --- set up the fields array to start with */ void @@ -140,7 +142,6 @@ rebuild_record() char *ops; register char *cops; long i; - char *f0start, *f0end; assert(NF != -1); @@ -184,15 +185,27 @@ rebuild_record() * any fields that still point into it, and have them point * into the new field zero. */ - f0start = fields_arr[0]->stptr; - f0end = fields_arr[0]->stptr + fields_arr[0]->stlen; for (cops = ops, i = 1; i <= NF; i++) { - char *field_data = fields_arr[i]->stptr; - - if (fields_arr[i]->stlen > 0 - && f0start <= field_data && field_data < f0end) - fields_arr[i]->stptr = cops; + if (fields_arr[i]->stlen > 0) { + NODE *n; + getnode(n); + + if ((fields_arr[i]->flags & FIELD) == 0) { + *n = *Null_field; + n->stlen = fields_arr[i]->stlen; + if ((fields_arr[i]->flags & (NUM|NUMBER)) != 0) { + n->flags |= (fields_arr[i]->flags & (NUM|NUMBER)); + n->numbr = fields_arr[i]->numbr; + } + } else { + *n = *(fields_arr[i]); + n->flags &= ~(MALLOC|TEMP|PERM|STRING); + } + n->stptr = cops; + unref(fields_arr[i]); + fields_arr[i] = n; + } cops += fields_arr[i]->stlen + ofslen; } @@ -751,7 +764,7 @@ NODE *tree; arr->type = Node_var_array; assoc_clear(arr); - if (sep->re_flags & FS_DFLT) { + if ((sep->re_flags & FS_DFLT) != 0 && ! using_FIELDWIDTHS()) { parseit = parse_field; fs = force_string(FS_node->var_value); rp = FS_regexp; @@ -851,20 +864,20 @@ set_FS() if (fields_arr != NULL) (void) get_field(HUGE - 1, 0); - if (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0 - && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0) - return; - unref(save_fs); - save_fs = dupnode(FS_node->var_value); - unref(save_rs); - save_rs = dupnode(RS_node->var_value); - resave_fs = TRUE; - buf[0] = '\0'; - default_FS = FALSE; - if (FS_regexp) { - refree(FS_regexp); - FS_regexp = NULL; + if (! (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0 + && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)) { + unref(save_fs); + save_fs = dupnode(FS_node->var_value); + unref(save_rs); + save_rs = dupnode(RS_node->var_value); + resave_fs = TRUE; + if (FS_regexp) { + refree(FS_regexp); + FS_regexp = NULL; + } } + buf[0] = '\0'; + default_FS = FALSE; fs = force_string(FS_node->var_value); if (! do_traditional && fs->stlen == 0) parse_field = null_parse_field; @@ -887,7 +900,7 @@ set_FS() if (fs->stptr[0] == ' ' && fs->stlen == 1) default_FS = TRUE; else if (fs->stptr[0] != ' ' && fs->stlen == 1) { - if (! IGNORECASE) + if (! IGNORECASE || ! isalpha(fs->stptr[0])) parse_field = sc_parse_field; else if (fs->stptr[0] == '\\') /* yet another special case */ @@ -910,6 +923,5 @@ set_FS() int using_fieldwidths() { - return parse_field == fw_parse_field; + return using_FIELDWIDTHS(); } - diff --git a/contrib/awk/gawkmisc.c b/contrib/awk/gawkmisc.c index 0707971..c2be751 100644 --- a/contrib/awk/gawkmisc.c +++ b/contrib/awk/gawkmisc.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991 - 97 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. diff --git a/contrib/awk/io.c b/contrib/awk/io.c index 74d9a8d..5000aa9 100644 --- a/contrib/awk/io.c +++ b/contrib/awk/io.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1976, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -49,8 +49,6 @@ #define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR) #endif -#include <assert.h> - #if ! defined(S_ISREG) && defined(S_IFREG) #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif @@ -95,19 +93,8 @@ static int specfdopen P((IOBUF *iop, const char *name, const char *mode)); static int pidopen P((IOBUF *iop, const char *name, const char *mode)); static int useropen P((IOBUF *iop, const char *name, const char *mode)); -#if defined (MSDOS) && !defined (__GO32__) +#if defined (HAVE_POPEN_H) #include "popen.h" -#define popen(c, m) os_popen(c, m) -#define pclose(f) os_pclose(f) -#else -#if defined (OS2) /* OS/2, but not family mode */ -#if defined (_MSC_VER) -#define popen(c, m) _popen(c, m) -#define pclose(f) _pclose(f) -#endif -#else -extern FILE *popen(); -#endif #endif static struct redirect *red_head = NULL; @@ -125,6 +112,15 @@ extern NODE **fields_arr; static jmp_buf filebuf; /* for do_nextfile() */ +#ifdef VMS +/* File pointers have an extra level of indirection, and there are cases where + `stdin' can be null. That can crash gawk if fileno() is used as-is. */ +static int vmsrtl_fileno P((FILE *)); +static int vmsrtl_fileno(fp) FILE *fp; { return fileno(fp); } +#undef fileno +#define fileno(FP) (((FP) && *(FP)) ? vmsrtl_fileno(FP) : -1) +#endif /* VMS */ + /* do_nextfile --- implement gawk "nextfile" extension */ void @@ -487,9 +483,10 @@ int *errflg; /* too many files open -- close one and try again */ if (errno == EMFILE || errno == ENFILE) close_one(); -#ifdef HAVE_MMAP +#if defined __MINGW32__ || defined HAVE_MMAP /* this works for solaris 2.5, not sunos */ - else if (errno == 0) /* HACK! */ + /* it is also needed for MINGW32 */ + else if (errno == 0) /* HACK! */ close_one(); #endif else { @@ -573,24 +570,22 @@ NODE *tree; tmp = force_string(tree_eval(tree->subnode)); - /* icky special case: close(FILENAME) called. */ - if (tree->subnode == FILENAME_node - || (tmp->stlen == FILENAME_node->var_value->stlen - && STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) { - (void) nextfile(TRUE); - free_temp(tmp); - return tmp_number((AWKNUM) 0.0); - } - for (rp = red_head; rp != NULL; rp = rp->next) { if (strlen(rp->value) == tmp->stlen && STREQN(rp->value, tmp->stptr, tmp->stlen)) break; } + if (rp == NULL) { /* no match */ - if (do_lint) + /* icky special case: close(FILENAME) called. */ + if (tree->subnode == FILENAME_node + || (tmp->stlen == FILENAME_node->var_value->stlen + && STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) { + (void) nextfile(TRUE); + } else if (do_lint) warning("close: `%.*s' is not an open file or pipe", tmp->stlen, tmp->stptr); + free_temp(tmp); return tmp_number((AWKNUM) 0.0); } @@ -888,11 +883,11 @@ const char *name, *mode; int i; if (name[6] == 'g') - sprintf(tbuf, "%d\n", getpgrp(getpgrp_arg())); + sprintf(tbuf, "%d\n", (int) getpgrp(getpgrp_arg())); else if (name[6] == 'i') - sprintf(tbuf, "%d\n", getpid()); + sprintf(tbuf, "%d\n", (int) getpid()); else - sprintf(tbuf, "%d\n", getppid()); + sprintf(tbuf, "%d\n", (int) getppid()); i = strlen(tbuf); spec_setup(iop, i, TRUE); strcpy(iop->buf, tbuf); @@ -923,7 +918,7 @@ const char *name, *mode; int ngroups; #endif - sprintf(tbuf, "%d %d %d %d", getuid(), geteuid(), getgid(), getegid()); + sprintf(tbuf, "%d %d %d %d", (int) getuid(), (int) geteuid(), (int) getgid(), (int) getegid()); cp = tbuf + strlen(tbuf); #if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 @@ -1008,7 +1003,7 @@ strictopen: if (openfd == INVALID_HANDLE) openfd = open(name, flag, 0666); if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) - if ((buf.st_mode & S_IFMT) == S_IFDIR) + if (S_ISDIR(buf.st_mode)) fatal("file `%s' is a directory", name); return iop_alloc(openfd, name, iop); } @@ -1120,7 +1115,7 @@ struct redirect *rp; * except if popen() provides real pipes too */ -#if defined(VMS) || defined(OS2) || defined (MSDOS) +#if defined(VMS) || defined(OS2) || defined (MSDOS) || defined(WIN32) /* gawk_popen --- open an IOBUF on a child process */ @@ -1135,7 +1130,7 @@ struct redirect *rp; return NULL; rp->iop = iop_alloc(fileno(current), cmd, NULL); if (rp->iop == NULL) { - (void) fclose(current); + (void) pclose(current); current = NULL; } rp->ifp = current; @@ -1434,7 +1429,12 @@ IOBUF *iop; iop->name = name; iop->getrec = get_a_record; #ifdef HAVE_MMAP - if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0) { + /* Use mmap only for regular files with positive sizes. + The size must fit into size_t, so that mmap works correctly. + Also, it must fit into int, so that iop->cnt won't overflow. */ + if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0 + && sbuf.st_size == (size_t) sbuf.st_size + && sbuf.st_size == (int) sbuf.st_size) { register char *cp; iop->buf = iop->off = mmap((caddr_t) 0, sbuf.st_size, @@ -1535,7 +1535,7 @@ int *errcode; /* pointer to error variable */ return EOF; } - if (grRS == FALSE) /* special case: RS == "" */ + if (RS_is_null) /* special case: RS == "" */ rs = '\n'; else rs = (char) grRS; @@ -1648,7 +1648,7 @@ int *errcode; /* pointer to error variable */ */ if (! do_traditional && RSre != NULL) /* regexp */ rsre = RSre; - else if (grRS == FALSE) /* RS = "" */ + else if (RS_is_null) /* RS = "" */ rsre = RS_null_re; else rsre = NULL; @@ -1675,6 +1675,21 @@ int *errcode; /* pointer to error variable */ /* cases 1 and 2 are simple, just keep going */ if (research(rsre, start, 0, iop->end - start, TRUE) == -1 || RESTART(rsre, start) == REEND(rsre, start)) { + /* + * Leading newlines at the beginning of the file + * should be ignored. Whew! + */ + if (RS_is_null && *start == '\n') { + /* + * have to catch the case of a + * single newline at the front of + * the record, which the regex + * doesn't. gurr. + */ + while (*start == '\n' && start < iop->end) + start++; + goto again; + } bp = iop->end; continue; } @@ -1690,8 +1705,10 @@ int *errcode; /* pointer to error variable */ /* * Leading newlines at the beginning of the file * should be ignored. Whew! + * + * Is this code ever executed? */ - if (grRS == FALSE && RESTART(rsre, start) == 0) { + if (RS_is_null && RESTART(rsre, start) == 0) { start += REEND(rsre, start); goto again; } @@ -1737,7 +1754,7 @@ int *errcode; /* pointer to error variable */ bstart = bp; } *bp = '\0'; - } else if (grRS == FALSE && iop->cnt == EOF) { + } else if (RS_is_null && iop->cnt == EOF) { /* * special case, delete trailing newlines, * should never be more than one. @@ -1811,7 +1828,7 @@ int *errcode; /* pointer to error variable */ return EOF; } - if (grRS == FALSE) /* special case: RS == "" */ + if (RS_is_null) /* special case: RS == "" */ rs = '\n'; else rs = (char) grRS; @@ -1821,7 +1838,7 @@ int *errcode; /* pointer to error variable */ rs = casetable[rs]; /* if RS = "", skip leading newlines at the front of the file */ - if (grRS == FALSE && iop->off == iop->buf) { + if (RS_is_null && iop->off == iop->buf) { for (bp = iop->off; *bp == '\n'; bp++) continue; @@ -1835,7 +1852,7 @@ int *errcode; /* pointer to error variable */ */ if (! do_traditional && RSre != NULL) /* regexp */ rsre = RSre; - else if (grRS == FALSE) /* RS = "" */ + else if (RS_is_null) /* RS = "" */ rsre = RS_null_re; else rsre = NULL; @@ -1862,7 +1879,7 @@ int *errcode; /* pointer to error variable */ iop->off = iop->end; /* all done with the record */ set_RT_to_null(); /* special case, don't allow trailing newlines */ - if (grRS == FALSE && *(iop->end - 1) == '\n') + if (RS_is_null && *(iop->end - 1) == '\n') return iop->end - start - 1; else return iop->end - start; diff --git a/contrib/awk/main.c b/contrib/awk/main.c index 92445de..4ef2eb7 100644 --- a/contrib/awk/main.c +++ b/contrib/awk/main.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -439,8 +439,8 @@ FILE *fp; fputs("\t-W traditional\t\t--traditional\n", fp); fputs("\t-W usage\t\t--usage\n", fp); fputs("\t-W version\t\t--version\n", fp); - fputs("\nReport bugs to bug-gnu-utils@prep.ai.mit.edu,\n", fp); - fputs("with a Cc: to arnold@gnu.ai.mit.edu\n", fp); + fputs("\nReport bugs to bug-gnu-utils@gnu.org,\n", fp); + fputs("with a Cc: to arnold@gnu.org\n", fp); exit(exitval); } @@ -450,7 +450,7 @@ static void copyleft() { static char blurb_part1[] = -"Copyright (C) 1989, 1991-1997 Free Software Foundation.\n\ +"Copyright (C) 1989, 1991-1999 Free Software Foundation.\n\ \n\ This program is free software; you can redistribute it and/or modify\n\ it under the terms of the GNU General Public License as published by\n\ diff --git a/contrib/awk/msg.c b/contrib/awk/msg.c index 82fa422..c5aaa06 100644 --- a/contrib/awk/msg.c +++ b/contrib/awk/msg.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. diff --git a/contrib/awk/node.c b/contrib/awk/node.c index 6f10b9f..af899b6 100644 --- a/contrib/awk/node.c +++ b/contrib/awk/node.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-1999 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -36,6 +36,7 @@ register NODE *n; char save; char *ptr; unsigned int newflags; + extern double strtod(); #ifdef DEBUG if (n == NULL) @@ -140,7 +141,16 @@ register NODE *s; /* not an integral value, or out of range */ if ((val = double_to_int(s->numbr)) != s->numbr || val < LONG_MIN || val > LONG_MAX) { -#ifdef GFMT_WORKAROUND + /* + * Once upon a time, if GFMT_WORKAROUND wasn't defined, + * we just blindly did this: + * sprintf(sp, format, s->numbr); + * s->stlen = strlen(sp); + * s->stfmt = (char) index; + * but that's no good if, e.g., OFMT is %s. So we punt, + * and just always format the value ourselves. + */ + NODE *dummy, *r; unsigned short oflags; extern NODE *format_tree P((const char *, int, NODE *)); @@ -161,15 +171,6 @@ register NODE *s; freenode(dummy); /* to keep s->stptr == r->stpr. */ goto no_malloc; -#else - /* - * no need for a "replacement" formatting by gawk, - * just use sprintf - */ - sprintf(sp, format, s->numbr); - s->stlen = strlen(sp); - s->stfmt = (char) index; -#endif /* GFMT_WORKAROUND */ } else { /* integral value */ /* force conversion to long only once */ @@ -183,11 +184,9 @@ register NODE *s; } s->stfmt = -1; } - emalloc(s->stptr, char *, s->stlen + 2, "force_string"); + emalloc(s->stptr, char *, s->stlen + 2, "format_val"); memcpy(s->stptr, sp, s->stlen+1); -#ifdef GFMT_WORKAROUND no_malloc: -#endif /* GFMT_WORKAROUND */ s->stref = 1; s->flags |= STR; return s; @@ -495,7 +494,9 @@ char **string_ptr; } i = 0; for (;;) { - if (ISXDIGIT((c = *(*string_ptr)++))) { + /* do outside test to avoid multiple side effects */ + c = *(*string_ptr)++; + if (ISXDIGIT(c)) { i *= 16; if (ISDIGIT(c)) i += c - '0'; diff --git a/contrib/awk/patchlevel.h b/contrib/awk/patchlevel.h index f360824..ccb0c85 100644 --- a/contrib/awk/patchlevel.h +++ b/contrib/awk/patchlevel.h @@ -1 +1 @@ -#define PATCHLEVEL 3 +#define PATCHLEVEL 4 diff --git a/contrib/awk/posix/ChangeLog b/contrib/awk/posix/ChangeLog index 575baa1..00c39a4 100644 --- a/contrib/awk/posix/ChangeLog +++ b/contrib/awk/posix/ChangeLog @@ -1,3 +1,7 @@ +Wed Jun 30 16:14:36 1999 Arnold D. Robbins <arnold@gnu.org> + + * Release 3.0.4: Release tar file made. This time for sure. + Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us> * Release 3.0.3: Release tar file made. |