summaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorjraynard <jraynard@FreeBSD.org>1997-10-14 18:17:11 +0000
committerjraynard <jraynard@FreeBSD.org>1997-10-14 18:17:11 +0000
commit5cffd06d9dff241b1cfa78a8039972b9368e5cc2 (patch)
tree5c6bc638fe385e88fcc4d588090c50e902d4995c /contrib
parent60aed874ba23ae327f34ccc7b28642c36f0c9845 (diff)
parenta46c41193ff2573a4c910e19b570e9c253e714a1 (diff)
downloadFreeBSD-src-5cffd06d9dff241b1cfa78a8039972b9368e5cc2.zip
FreeBSD-src-5cffd06d9dff241b1cfa78a8039972b9368e5cc2.tar.gz
This commit was generated by cvs2svn to compensate for changes in r30411,
which included commits to RCS files with non-trunk default branches.
Diffstat (limited to 'contrib')
-rw-r--r--contrib/awk/ACKNOWLEDGMENT34
-rw-r--r--contrib/awk/COPYING340
-rw-r--r--contrib/awk/ChangeLog826
-rw-r--r--contrib/awk/FREEBSD-upgrade60
-rw-r--r--contrib/awk/FUTURES108
-rw-r--r--contrib/awk/INSTALL181
-rw-r--r--contrib/awk/LIMITATIONS16
-rw-r--r--contrib/awk/NEWS1802
-rw-r--r--contrib/awk/PORTS36
-rw-r--r--contrib/awk/POSIX.STD109
-rw-r--r--contrib/awk/PROBLEMS10
-rw-r--r--contrib/awk/README94
-rw-r--r--contrib/awk/README_d/README.FIRST21
-rw-r--r--contrib/awk/acconfig.h36
-rw-r--r--contrib/awk/array.c526
-rw-r--r--contrib/awk/awk.h882
-rw-r--r--contrib/awk/awk.y2434
-rw-r--r--contrib/awk/builtin.c2048
-rw-r--r--contrib/awk/config.h207
-rw-r--r--contrib/awk/custom.h59
-rw-r--r--contrib/awk/dfa.c2606
-rw-r--r--contrib/awk/dfa.h364
-rw-r--r--contrib/awk/doc/ChangeLog91
-rw-r--r--contrib/awk/doc/awk.12621
-rw-r--r--contrib/awk/doc/gawk.texi20820
-rw-r--r--contrib/awk/eval.c1720
-rw-r--r--contrib/awk/field.c915
-rw-r--r--contrib/awk/gawkmisc.c63
-rw-r--r--contrib/awk/getopt.c1000
-rw-r--r--contrib/awk/getopt.h133
-rw-r--r--contrib/awk/getopt1.c189
-rw-r--r--contrib/awk/io.c1941
-rw-r--r--contrib/awk/main.c735
-rw-r--r--contrib/awk/missing.c59
-rw-r--r--contrib/awk/msg.c189
-rw-r--r--contrib/awk/node.c515
-rw-r--r--contrib/awk/patchlevel.h1
-rw-r--r--contrib/awk/posix/ChangeLog19
-rw-r--r--contrib/awk/posix/gawkmisc.c108
-rw-r--r--contrib/awk/random.c379
-rw-r--r--contrib/awk/random.h29
-rw-r--r--contrib/awk/re.c310
-rw-r--r--contrib/awk/regex.h531
-rw-r--r--contrib/awk/test/ChangeLog252
-rw-r--r--contrib/awk/test/Makefile451
-rw-r--r--contrib/awk/test/Makefile.in451
-rw-r--r--contrib/awk/test/README18
-rw-r--r--contrib/awk/test/anchgsub.awk1
-rw-r--r--contrib/awk/test/anchgsub.in1
-rw-r--r--contrib/awk/test/anchgsub.ok1
-rw-r--r--contrib/awk/test/argarray.awk14
-rw-r--r--contrib/awk/test/argarray.in1
-rw-r--r--contrib/awk/test/argarray.ok9
-rw-r--r--contrib/awk/test/argtest.awk4
-rw-r--r--contrib/awk/test/argtest.ok4
-rw-r--r--contrib/awk/test/arrayparm.awk21
-rw-r--r--contrib/awk/test/arrayparm.ok1
-rw-r--r--contrib/awk/test/arrayref.awk13
-rw-r--r--contrib/awk/test/arrayref.ok2
-rw-r--r--contrib/awk/test/asgext.awk1
-rw-r--r--contrib/awk/test/asgext.in3
-rw-r--r--contrib/awk/test/asgext.ok6
-rw-r--r--contrib/awk/test/awkpath.ok1
-rw-r--r--contrib/awk/test/back89.in2
-rw-r--r--contrib/awk/test/back89.ok1
-rw-r--r--contrib/awk/test/backgsub.awk4
-rw-r--r--contrib/awk/test/backgsub.in1
-rw-r--r--contrib/awk/test/backgsub.ok1
-rw-r--r--contrib/awk/test/badargs.ok23
-rw-r--r--contrib/awk/test/childin.ok1
-rw-r--r--contrib/awk/test/clobber.awk98
-rw-r--r--contrib/awk/test/clobber.ok1
-rw-r--r--contrib/awk/test/clsflnam.awk12
-rw-r--r--contrib/awk/test/clsflnam.in3
-rw-r--r--contrib/awk/test/clsflnam.ok0
-rw-r--r--contrib/awk/test/compare.awk13
-rw-r--r--contrib/awk/test/compare.in4
-rw-r--r--contrib/awk/test/compare.ok5
-rw-r--r--contrib/awk/test/convfmt.awk10
-rw-r--r--contrib/awk/test/convfmt.ok3
-rw-r--r--contrib/awk/test/defref.awk1
-rw-r--r--contrib/awk/test/defref.ok2
-rw-r--r--contrib/awk/test/delarprm.awk50
-rw-r--r--contrib/awk/test/delarprm.ok0
-rw-r--r--contrib/awk/test/dynlj.awk1
-rw-r--r--contrib/awk/test/dynlj.ok1
-rw-r--r--contrib/awk/test/eofsplit.awk68
-rw-r--r--contrib/awk/test/eofsplit.ok0
-rw-r--r--contrib/awk/test/fflush.ok16
-rwxr-xr-xcontrib/awk/test/fflush.sh16
-rw-r--r--contrib/awk/test/fieldwdth.ok1
-rw-r--r--contrib/awk/test/fldchg.awk8
-rw-r--r--contrib/awk/test/fldchg.in1
-rw-r--r--contrib/awk/test/fldchg.ok3
-rw-r--r--contrib/awk/test/fldchgnf.awk1
-rw-r--r--contrib/awk/test/fldchgnf.in1
-rw-r--r--contrib/awk/test/fldchgnf.ok2
-rw-r--r--contrib/awk/test/fnarray.awk7
-rw-r--r--contrib/awk/test/fnarray.ok1
-rw-r--r--contrib/awk/test/fnarydel.awk60
-rw-r--r--contrib/awk/test/fnarydel.ok27
-rw-r--r--contrib/awk/test/fsbs.in1
-rw-r--r--contrib/awk/test/fsbs.ok1
-rw-r--r--contrib/awk/test/fsrs.awk8
-rw-r--r--contrib/awk/test/fsrs.in7
-rw-r--r--contrib/awk/test/fsrs.ok5
-rw-r--r--contrib/awk/test/fstabplus.awk2
-rw-r--r--contrib/awk/test/fstabplus.ok1
-rw-r--r--contrib/awk/test/funstack.awk977
-rw-r--r--contrib/awk/test/funstack.in206
-rw-r--r--contrib/awk/test/funstack.ok0
-rw-r--r--contrib/awk/test/gensub.awk7
-rw-r--r--contrib/awk/test/gensub.in2
-rw-r--r--contrib/awk/test/gensub.ok4
-rw-r--r--contrib/awk/test/getline.awk1
-rw-r--r--contrib/awk/test/getline.ok2
-rw-r--r--contrib/awk/test/getlnhd.awk10
-rw-r--r--contrib/awk/test/getlnhd.ok2
-rw-r--r--contrib/awk/test/gnureops.awk45
-rw-r--r--contrib/awk/test/gnureops.ok17
-rw-r--r--contrib/awk/test/gsubasgn.awk13
-rw-r--r--contrib/awk/test/gsubasgn.ok4
-rwxr-xr-xcontrib/awk/test/gsubtest.awk8
-rw-r--r--contrib/awk/test/gsubtest.ok6
-rw-r--r--contrib/awk/test/igncfs.awk8
-rw-r--r--contrib/awk/test/igncfs.in2
-rw-r--r--contrib/awk/test/igncfs.ok2
-rw-r--r--contrib/awk/test/ignrcase.ok1
-rw-r--r--contrib/awk/test/inftest.awk5
-rw-r--r--contrib/awk/test/inftest.ok105
-rw-r--r--contrib/awk/test/intest.awk4
-rw-r--r--contrib/awk/test/intest.ok1
-rw-r--r--contrib/awk/test/intprec.awk1
-rw-r--r--contrib/awk/test/intprec.ok1
-rw-r--r--contrib/awk/test/lib/awkpath.awk1
-rw-r--r--contrib/awk/test/litoct.awk1
-rw-r--r--contrib/awk/test/litoct.ok1
-rw-r--r--contrib/awk/test/longwrds.awk20
-rw-r--r--contrib/awk/test/longwrds.ok21
-rw-r--r--contrib/awk/test/manpage200
-rw-r--r--contrib/awk/test/manyfiles.awk1
-rw-r--r--contrib/awk/test/math.awk10
-rw-r--r--contrib/awk/test/math.ok6
-rw-r--r--contrib/awk/test/messages.awk9
-rw-r--r--contrib/awk/test/mmap8k.in143
-rw-r--r--contrib/awk/test/negexp.ok1
-rw-r--r--contrib/awk/test/nfldstr.ok0
-rw-r--r--contrib/awk/test/nfset.awk1
-rw-r--r--contrib/awk/test/nfset.in5
-rw-r--r--contrib/awk/test/nfset.ok5
-rw-r--r--contrib/awk/test/nlfldsep.awk2
-rw-r--r--contrib/awk/test/nlfldsep.in5
-rw-r--r--contrib/awk/test/nlfldsep.ok13
-rw-r--r--contrib/awk/test/noeffect.awk4
-rw-r--r--contrib/awk/test/noeffect.ok2
-rw-r--r--contrib/awk/test/nofmtch.awk1
-rw-r--r--contrib/awk/test/nofmtch.ok2
-rw-r--r--contrib/awk/test/nondec.awk1
-rw-r--r--contrib/awk/test/nondec.ok1
-rw-r--r--contrib/awk/test/nonl.awk1
-rw-r--r--contrib/awk/test/nonl.ok1
-rw-r--r--contrib/awk/test/noparms.awk1
-rw-r--r--contrib/awk/test/noparms.ok4
-rw-r--r--contrib/awk/test/nors.in1
-rw-r--r--contrib/awk/test/nors.ok2
-rw-r--r--contrib/awk/test/numsubstr.awk1
-rw-r--r--contrib/awk/test/numsubstr.in3
-rw-r--r--contrib/awk/test/numsubstr.ok3
-rw-r--r--contrib/awk/test/out1.ok1
-rw-r--r--contrib/awk/test/out2.ok2
-rw-r--r--contrib/awk/test/out3.ok1
-rw-r--r--contrib/awk/test/paramdup.awk8
-rw-r--r--contrib/awk/test/paramdup.ok2
-rw-r--r--contrib/awk/test/pcntplus.awk1
-rw-r--r--contrib/awk/test/pcntplus.ok1
-rw-r--r--contrib/awk/test/pid.awk44
-rw-r--r--contrib/awk/test/pid.ok0
-rwxr-xr-xcontrib/awk/test/pid.sh5
-rw-r--r--contrib/awk/test/pipeio1.awk31
-rw-r--r--contrib/awk/test/pipeio1.ok2
-rw-r--r--contrib/awk/test/pipeio2.awk67
-rw-r--r--contrib/awk/test/pipeio2.in8
-rw-r--r--contrib/awk/test/pipeio2.ok16
-rw-r--r--contrib/awk/test/posix.awk69
-rw-r--r--contrib/awk/test/posix.ok16
-rwxr-xr-xcontrib/awk/test/poundbang3
-rw-r--r--contrib/awk/test/poundbang.ok1
-rw-r--r--contrib/awk/test/prdupval.awk1
-rw-r--r--contrib/awk/test/prdupval.in1
-rw-r--r--contrib/awk/test/prdupval.ok1
-rw-r--r--contrib/awk/test/prmarscl.awk6
-rw-r--r--contrib/awk/test/prmarscl.ok1
-rw-r--r--contrib/awk/test/prmreuse.awk14
-rw-r--r--contrib/awk/test/prmreuse.ok0
-rw-r--r--contrib/awk/test/prt1eval.awk6
-rw-r--r--contrib/awk/test/prt1eval.ok1
-rw-r--r--contrib/awk/test/prtoeval.awk4
-rw-r--r--contrib/awk/test/prtoeval.ok2
-rw-r--r--contrib/awk/test/rand.awk6
-rw-r--r--contrib/awk/test/rand.ok1
-rw-r--r--contrib/awk/test/reg/exp-eq.awk1
-rw-r--r--contrib/awk/test/reg/exp-eq.good3
-rw-r--r--contrib/awk/test/reg/exp-eq.in3
-rw-r--r--contrib/awk/test/reg/exp.awk1
-rw-r--r--contrib/awk/test/reg/exp.good2
-rw-r--r--contrib/awk/test/reg/exp.in0
-rw-r--r--contrib/awk/test/reg/func.awk1
-rw-r--r--contrib/awk/test/reg/func.good1
-rw-r--r--contrib/awk/test/reg/func.in0
-rw-r--r--contrib/awk/test/reg/func2.awk2
-rw-r--r--contrib/awk/test/reg/func2.good2
-rw-r--r--contrib/awk/test/reg/func2.in0
-rw-r--r--contrib/awk/test/reg/log.awk1
-rw-r--r--contrib/awk/test/reg/log.good4
-rw-r--r--contrib/awk/test/reg/log.in0
-rwxr-xr-xcontrib/awk/test/regtest18
-rw-r--r--contrib/awk/test/reindops.awk6
-rw-r--r--contrib/awk/test/reindops.in1
-rw-r--r--contrib/awk/test/reindops.ok1
-rw-r--r--contrib/awk/test/reint.awk1
-rw-r--r--contrib/awk/test/reint.in1
-rw-r--r--contrib/awk/test/reint.ok1
-rw-r--r--contrib/awk/test/reparse.awk7
-rw-r--r--contrib/awk/test/reparse.in1
-rw-r--r--contrib/awk/test/reparse.ok3
-rw-r--r--contrib/awk/test/resplit.ok1
-rw-r--r--contrib/awk/test/rs.in15
-rw-r--r--contrib/awk/test/rs.ok3
-rw-r--r--contrib/awk/test/rswhite.awk2
-rw-r--r--contrib/awk/test/rswhite.in2
-rw-r--r--contrib/awk/test/rswhite.ok2
-rw-r--r--contrib/awk/test/sclforin.awk1
-rw-r--r--contrib/awk/test/sclforin.ok1
-rw-r--r--contrib/awk/test/sclifin.awk7
-rw-r--r--contrib/awk/test/sclifin.ok1
-rw-r--r--contrib/awk/test/splitargv.awk7
-rw-r--r--contrib/awk/test/splitargv.in7
-rw-r--r--contrib/awk/test/splitargv.ok7
-rw-r--r--contrib/awk/test/splitvar.awk5
-rw-r--r--contrib/awk/test/splitvar.in1
-rw-r--r--contrib/awk/test/splitvar.ok1
-rw-r--r--contrib/awk/test/splitwht.awk7
-rw-r--r--contrib/awk/test/splitwht.ok2
-rw-r--r--contrib/awk/test/sprintfc.awk1
-rw-r--r--contrib/awk/test/sprintfc.in3
-rw-r--r--contrib/awk/test/sprintfc.ok3
-rw-r--r--contrib/awk/test/strftlng.awk11
-rw-r--r--contrib/awk/test/strftlng.ok58
-rw-r--r--contrib/awk/test/substr.awk14
-rw-r--r--contrib/awk/test/substr.ok10
-rw-r--r--contrib/awk/test/swaplns.awk7
-rw-r--r--contrib/awk/test/swaplns.in9
-rw-r--r--contrib/awk/test/swaplns.ok9
-rw-r--r--contrib/awk/test/tradanch.awk2
-rw-r--r--contrib/awk/test/tradanch.in2
-rw-r--r--contrib/awk/test/tradanch.ok0
-rw-r--r--contrib/awk/test/tweakfld.awk296
-rw-r--r--contrib/awk/test/tweakfld.in3
-rw-r--r--contrib/awk/test/tweakfld.ok3
-rw-r--r--contrib/awk/version.c50
260 files changed, 49739 insertions, 0 deletions
diff --git a/contrib/awk/ACKNOWLEDGMENT b/contrib/awk/ACKNOWLEDGMENT
new file mode 100644
index 0000000..0851ecf
--- /dev/null
+++ b/contrib/awk/ACKNOWLEDGMENT
@@ -0,0 +1,34 @@
+The current developers of Gawk would like to thank and acknowledge the
+many people who have contributed to the development through bug reports
+and fixes and suggestions. Unfortunately, we have not been organized
+enough to keep track of all the names -- for that we apologize.
+
+The following people were involved in porting gawk to different platforms.
+
+ Mike Lijewski <mjlx@eagle.cnsf.cornell.edu> (IBM RS6000)
+ Kent Williams (MSDOS 2.11)
+ Conrad Kwok (MSDOS earlier versions)
+ Scott Garfinkle (MSDOS earlier versions)
+ Hal Peterson <hrp@pecan.cray.com> (Cray)
+
+This group of people comprise the "GAWK crack portability team", who
+test the pre-releases and ensure portability of gawk.
+
+ Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS)
+ Michal Jaegermann <michal@gortel.phys.UAlberta.CA>
+ (Atari, NeXT, DEC 3100)
+ Scott Deifik <scottd@amgen.com> (MSDOS 2.14, 2.15, 3.0)
+ Kai Uwe Rommel <rommel@ars.de> (OS/2)
+ Darrel Hankerson <hankedr@mail.auburn.edu> (DOS and formerly OS/2)
+ Mark Moraes <Mark-Moraes@deshaw.com> (Code Center, Purify)
+ Kaveh Ghazi <ghazi@noc.rutgers.edu> (Lots of Unix variants)
+
+Michal, Scott and Darrel go out of their way to make sure that gawk
+works on non-32 bit systems, and keep me on track where portability is
+concerned. Indeed, all of these folks are incredibly helpful; gawk would
+not be the fine program it is now without them.
+
+Last, but far from least, we would like to thank Brian Kernighan who
+has helped to clear up many dark corners of the language and provided a
+restraining touch when we have been overly tempted by "feeping
+creaturism".
diff --git a/contrib/awk/COPYING b/contrib/awk/COPYING
new file mode 100644
index 0000000..60549be
--- /dev/null
+++ b/contrib/awk/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/contrib/awk/ChangeLog b/contrib/awk/ChangeLog
new file mode 100644
index 0000000..0fa6515
--- /dev/null
+++ b/contrib/awk/ChangeLog
@@ -0,0 +1,826 @@
+Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.3: Release tar file made.
+
+Wed May 14 08:06:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (do_close): add lint warning if closing something that
+ isn't open.
+
+Tue May 13 12:14:12 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * random.c, builtin.c: remove __GLIBC__ tests, since it breaks
+ `make test'. I prefer consistency across platforms.
+ * Makefile.in (gawk): undid April 25 changes and added comment.
+ Putting COMPLAGS in breaks with -g on VMS POSIX.
+
+Sun May 11 14:48:04 1997 Darrell Hankerson <hankedr@mail.auburn.edu>
+
+ * io.c [MSC_VER]: add cases for WIN32.
+ * regex.c [MSC_VER]: add cases for WIN32.
+
+Sun May 11 07:04:01 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_print): in the loop that evaluates each expression
+ to be printed, do a dupnode to avoid bizarre output. Thanks to
+ Michal for finding this problem.
+ * awk.y (yylex): fix scanning of hexadecimal constants.
+
+Wed May 7 15:09:25 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (get_a_record): fix casetable indexing with cast to int.
+ Keeps Michal happy.
+
+Tue May 6 16:40:19 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (func_call): removed unneeded variables.
+
+Mon May 5 21:17:37 1997 Pat Rankin <rankin@eql.caltech.edu>
+
+ * missing/strftime.c [case 'v', VMS_EXT]: for VMS date format, two
+ digit day of month should not be zero padded on the 1st through
+ the 9th.
+
+Mon May 5 06:33:47 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * regex.h, regex.c: merge with current GLIBC version.
+
+Mon May 5 06:33:47 1997 Pat Rankin <rankin@eql.caltech.edu>
+
+ * io.c (nextfile): move the check for null return from iop_open
+ in the normal case and add one for the "no args" case.
+
+Fri Apr 25 16:52:33 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * array.c (grow_table): add a bunch more large primes so arrays
+ can get really big. Thanks to christos@deshaw.com.
+ * all files: remove ifdef'ed out code and update copyrights.
+ * Makefile.in (gawk): add $(COMPFLAGS) to command line.
+ * eval.c (flags2str): added case for FIELD.
+
+Thu Apr 24 22:39:23 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * COPYING: changed to current official version from FSF.
+ * regex.c: merge with GLIBC version.
+ * awk.h [_GNU_SOURCE]: bracket definition inside ifdef.
+ (NODE.source_line): move name member out of `x' union and
+ into `nodep'; avoids problems doing diagnostics.
+ (nondec2num): put decl into #if BITOPS || NONDECDATA
+ * posix/gawkmisc.c, missing/system.c, missing/strtod.c,
+ missing/strerror.c: move to generic GPL statement at top.
+ * builtin.c (nondec2num): put into #if BITOPS || NONDECDATA
+
+Wed Apr 23 22:14:14 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * dfa.c: misc changes for really pedantic SGI compilers.
+ * builtin.c: bracket defs of random() etc for GLIBC.
+ * random.c: bracket whole file for GLIBC.
+ * configure.in: extra goop for GETPGRP test for VMS POSIX.
+ * custom.h [VMS]: remove hard definition of GETPGRP_VOID.
+
+Fri Apr 18 07:55:47 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * BETA Release 3.0.34: Release tar file made.
+
+Tue Apr 15 21:35:45 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ NEW UNDOCUMENTED FEATURE. USE THE SOURCE LUKE!
+ * acconfig.h [NONDECDATA]: new macro.
+ * awk.h: add decl of do_strtonum.
+ * awk.y (tokentab): add entry for strtonum function.
+ * builtin.c (do_strtonum): new function.
+ * configure.in (non-decimal-data): new --enable-* option.
+ * node.c (r_force_number): change to allow non-decimal data inside
+ ifdef NONDECDATA.
+
+Tue Apr 15 06:32:50 1997 Pat Rankin <rankin@eql.caltech.edu>
+
+ * missing/strftime.c (malloc, realloc, getenv, strchr): only
+ declare these when STDC_HEADERS is not defined.
+ <stdlib.h, string.h>: include these when STDC_HEADERS is defined.
+ * awk.h (freenode, tree_eval, m_tree_eval): reorganize definitions.
+ * alloca.c (malloc): if malloc is already defined as a macro,
+ presumeably by config.h, don't define or declare it.
+
+Wed Apr 9 22:45:27 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in [COMPFLAGS]: per suggestion from Karl Berry, put
+ $(CFLAGS) last.
+
+Tue Apr 8 23:54:46 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (interpret): For Node_K_break and Node_K_continue, if
+ treating them like `next', also check the function call stack
+ and pop it if necessary.
+
+Mon Apr 7 18:22:37 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h: Add decls of new routines do_compl() and set_loc().
+ * awk.y (tokentab): add entry for "compl" function.
+ * builtin.c (do_compl): new functionto do ones complement.
+ (do_substr): rationalized yet again, now notices negative start
+ and length parameters.
+ * eval.c (push_args): fix if call_list gets realloc'ed in the
+ middle of things. Avoids crash for deeply nested function calls.
+ * main.c (catch_sig): add call to set_loc().
+ * msg.c (set_loc, srcfile, srcline): new function and private
+ variables to help out in tracing down source of error messages.
+
+Fri Mar 28 08:42:27 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (iop_alloc, iop_close): Undo changes of Feb 11, apparently
+ other cleanups in io.c made mmap stuff start working again.
+ BAH! It's a mess, the test suite still fails. I'm leaving the
+ mmap stuff undefined for now. It'll probably get ripped out in 3.1.
+
+Thu Mar 27 08:48:57 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * custom.h [_SEQUENT_]: undef HAVE_MMAP.
+
+Wed Mar 26 09:08:16 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (iop_alloc): fix definition to make it static.
+
+Mon Mar 24 23:09:07 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * field.c (init_fields, etc..): more clean up use of Null_field
+ and the various flags.
+ * node.c (unref): if a field, free the node itself. Fixes
+ memory leak problems.
+
+Sun Mar 23 22:51:09 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h [FIELD]: new flag for node->flags field.
+ * builtin.c (sub_common): if FIELD is set, dup the string.
+ * field.c (init_fields): set up a new Null_field global var.
+ (init_fields, set_field, set_record) use the FIELD flag.
+ (getfield): use Null_field instead of private variable.
+ * io.c (wait_any): comment out calls to pclose and iop_close,
+ caused weird race conditions. See test/pipeio1.awk. Thanks
+ to Darrell Hankerson for tracing this one down.
+
+Tue Mar 18 20:57:18 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * dfa.c (inboth): free templist; plugs memory leak.
+ * field.c (init_fields, grow_fields_arr, set_field, rebuild_record,
+ set_record): remove PERM flag from entries in fields_arr[]. Fixes
+ nasty memory leak.
+
+Tue Mar 18 06:33:00 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.y (dup_parms): robustified against parameter errors.
+
+Sun Mar 16 21:31:40 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ NEW UNDOCUMENTED FEATURE. USE THE SOURCE LUKE!
+ * acconfig.h [BITOPS]: new macro. If set, do octal & hex and bit ops.
+ * awk.h [isnondecimal]: new macro, and decl of new functions.
+ * awk.y (yylex): add recognition of octal and hex constants.
+ * builtin.c (do_and, do_or, do_xor, do_lshift, do_rshift): new
+ functions that do bit operations.
+ (nondec2awknum): new function to convert octal or hex to double.
+ * configure.in: Add AC_ARG_ENABLE for bit operations.
+ * node.c (r_force_number): add octal and hex conversion.
+
+Sun Mar 16 21:28:56 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h [IOP_NOFREE_OBJ]: new macro.
+ * io.c (iop_open, iop_alloc): add new third parameter, which is
+ either NULL, meaning allocate a new IOP, or the address of one
+ already allocated. Have a static one in the `nextfile'
+ routine, and use the IOP_NOFREE_OBJ flag for it. All of this
+ keeps us from reading freed memory. The `swaplns' test fails
+ otherwise.
+ (iop_close): if IOP_NOFREE_OBJ is set, don't free the IOBUF.
+
+Wed Feb 26 06:21:02 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (in_function, pop_fcall_stack, pop_fcall, push_args):
+ new functions. These manage "frames" of awk function call arguments.
+ The problem is that a `next' or a `nextfile' from a function
+ leaks memory. These changes allow us to free up that memory.
+ (interpret): for Node_K_next and Node_K_nextfile, check if in
+ a function call and free all function call frames.
+
+Fri Feb 21 06:23:19 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Misc changes from Katsuyuki Okabe <HGC02147@niftyserve.or.jp>:
+ * builtin.c (do_substr): change a %d to %ld in warning message.
+ * eval.c (op_assign): fix format string for warning about %=.
+
+Wed Feb 19 23:29:02 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * main.c (main): add do_intervals to condition that causes
+ resetup() to be called again. Makes the --re-interval option
+ actually work. What a concept.
+
+Fri Feb 14 09:47:31 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c [#include "awk.h"]: undef HAVE_MMAP to just use thoe old code.
+ Something is causing a file descriptor leak, and this is getting to
+ be just too much hair. I reserve the right to rip out the mmap
+ code entirely at a future date.
+
+Tue Feb 11 06:28:29 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (iop_alloc): for an mmap'ed file, close the file descriptor,
+ and then touch each page to get a private copy. Fixes nasty case
+ of truncating our input file.
+ (iop_close): don't call close on mmap'ed file.
+
+Wed Feb 5 17:59:04 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (interpret): For Node_K_delete, just call do_delete; let
+ it handle the case of `delete array'.
+ * array.c (do_delete): Changed to handle case of `delete array',
+ and made smarter if the array is actually an uninitialized
+ parameter.
+
+Sun Jan 26 22:58:29 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * getopt.h, getopt.c, getopt1.c: replaced with new versions from
+ GLIBC 2.
+
+Sun Jan 19 23:37:03 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (nodetype2str): not static, for debugging.
+ (flags2str) new function: for debugging.
+ * field.c (get_field): add new var that is like Nnull_string but
+ does not have numeric attributes, so that new fields are strings.
+ (set_record): turn off PERM flag before unrefing fields and field 0.
+ * array.c (in_array): always evaluate subscript, could have
+ side effects.
+ * builtin.c (do_strftime): way increase size of buffer to make sure
+ we don't have overflow problem. Keeps Paul Eggert happy.
+ * custom.h [__amigaos__]: define fork to vfork. From Fred Fish.
+ * dfa.c: move include of config.h to top, for RSXNT. From Kai
+ Uwe Rommel.
+ (ISALPHA, etc): change from Jacob Engelbrecht (jaen@novo.dk)
+ to better handle non-ascii environments.
+ * gawkmisc.c: remove amigados case, posix should now work fine.
+ * amiga/*: nuked per previous entry.
+ * Makefile.in: removed all references to amiga
+ * io.c [HAVE_SYS_PARAM_H]: Add #undef RE_DUP_MAX to avoid
+ spurious conflict with regex.h.
+ (flush_io): remove amiga ifdefs, not needed anymore.
+ (spec_setup): set getrec field for special files. Fix from
+ Mark Gray (markgray@pdt.net).
+ * node.c (more_nodes): fix to get the last entry in the array.
+
+Wed Jan 8 17:42:37 1997 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
+
+ * io.c (mmap_get_record): Fix return value if file ends without
+ record separator.
+
+Fri Jan 3 19:57:16 1997 Pat Rankin <rankin@eql.caltech.edu>
+
+ * awk.y (get_src_buf): Test for an empty source file by detecting
+ an initial read of 0 bytes rather than by relying on info from
+ stat().
+
+Wed Dec 25 11:25:22 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.2: Release tar file made.
+
+Wed Dec 25 11:17:32 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (install, uninstall): use $(srcdir)/patchlevel.h.
+ Thanks to Richard Levitte, LeViMS@stacken.kth.se.
+ (install): remove chmod command; let $(INSTALL_PROGRAM) use -m.
+
+Mon Dec 23 20:36:59 1996 Pat Rankin <rankin@eql.caltech.edu>
+
+ * custom.h (#if VMS_POSIX): Define GETPGRP_VOID.
+
+Fri Dec 20 08:59:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * getopt.c, getopt1.c: comment out the `#if defined (_LIBC) ||
+ !defined (__GNU_LIBRARY__)' and `#endif' to force use of this
+ getopt, even on systems like linux. This will be handled
+ better in 3.1 / glibc 2.
+
+Thu Dec 19 22:52:39 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.y (yylex): In several places, after yyerror(), add call to
+ exit(). Otherwise, infinite messages. This should probably
+ be handled better.
+
+Wed Dec 18 22:42:10 1996 Darrel Hankerson <hankedr@mail.auburn.edu>
+
+ * getopt.c (_getopt_internal): if 'W' and ';', if optind == argc,
+ return c, don't fall through.
+
+Wed Dec 18 10:09:44 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * configure.in [AC_PREREQ]: Update to 2.12 in order to switch to
+ autoconf 2.12. Lots of other files will be rebuilt automatically.
+ [AM_SANITY_CHECK_CC]: Removed, autoconf does it now.
+ * alocal.m4 [AM_SANITY_CHECK_CC]: Removed, autoconf does it now.
+
+Tue Dec 17 22:23:16 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_strftime): fix case if format string is "".
+ Also fix it if format is not "" but result of strftime is "".
+ See comments in code.
+
+Tue Dec 10 23:09:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.1: Release tar file made.
+
+Tue Dec 10 22:39:41 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (dist): add dependency on `info'. Remove line that
+ does makeinfo.
+ (install): use $(LN) not $(LN_S) to link gawk gawk-version.
+
+Sun Dec 8 07:53:44 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (gawk): took COMPFLAGS out of link line for help
+ on VMS posix. Shouldn't (I hope) affect anything else.
+
+Thu Nov 28 11:52:24 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * configure.in (AC_PROG_INSTALL): Set INSTALL to install-sh.
+
+Tue Nov 26 22:42:00 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * PORTS: Updated list of systems.
+ * Makefile.in (install): Fix some typos and add some improvements
+ for Ultrix.
+
+Sun Nov 24 22:16:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_printf): if no args, fatal error. Return silently
+ if --traditional.
+
+Thu Nov 7 20:54:43 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (inrec): make sure EOF hasn't already happened before
+ trying to read; prevents accessing freed buffer. Thanks to
+ Michal Jaegermann.
+ * Makefile.in [AWKSRC]: add random.h.
+ random.h: new file, redefines names of the `random' functions.
+ random.c, builtin.c: add include of random.h.
+
+Thu Nov 7 09:06:21 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.y (snode): undo 4 Oct change, put do_split code back.
+ field.c (do_split): restore old code; add test for CONST, so
+ that re_parse_field is used if third arg to split is a regexp
+ constant.
+
+Mon Nov 4 12:57:11 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * main.c (main): Research -m[fr] options don't need literal '='
+ characters. Brian's documentation was confusing. Fixed, not
+ that anyone actually uses these options with gawk.
+
+Sun Nov 3 11:23:21 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * field.c (def_parse_field): add \n to list of acceptable white space.
+ (posix_def_parse_field): new routine, just like def_parse_field(),
+ but only allows space and tab as separators.
+ (do_split, set_FS): make appropriate choice between the two
+ *def_parse_field() routines.
+
+Fri Oct 25 10:13:06 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * configure.in: remove test for random.
+ * Makefile.in: add random.c to list of files always compiled.
+ * missing.c: remove HAVE_RANDOM test.
+ * builtin.c: remove ifdef's for HAVE_RANDOM.
+ [GAWK_RAND_MAX]: use constant we know works with our random().
+ * random.c: new file - moved from missing/ directory.
+
+Wed Oct 23 19:46:01 1996 Pat Rankin <rankin@eql.caltech.edu>
+
+ * builtin.c (do_tolower, do_toupper): Add `unsigned char *' casts.
+
+Tue Oct 22 21:27:52 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c [GAWK_RANDOM_MAX]: Try to make definition a bit
+ smarter; don't use RAND_MAX if it's equal to SHRT_MAX, blows
+ things up.
+
+Tue Oct 22 08:49:20 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * main.c (copyleft): update copyright date to 1996.
+ too many files to list: update copyright date to 1996.
+
+Sun Oct 20 12:21:09 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.y, dfa.c, eval.c, io.c, re.c: added various FIXME comments.
+
+Sat Oct 19 22:06:42 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (nodetype2str): make static, add prototype.
+ * field.c (sc_parse_field): cast array subscripts to int to
+ shut up gcc warnings.
+ * gawkmisc.c: add prototype for xmalloc.
+ * awk.h: add prototype for getredirect.
+ * builtin.c (do_fflush): remove extern decl of getredirect.
+ * io.c (get_a_record, mmap_get_record): change decl of rs to int,
+ to shut up gcc warnings.
+ * awk.y (isassignable): add a default to switch to quiet gcc.
+ * getopt.c (_getopt_internal): give default value to `indfound'.
+
+Fri Oct 18 09:00:49 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * regex.h [RE_SYNTAX_AWK]: add RE_CONTEXT_INDEP_ANCHORS.
+
+Thu Oct 17 22:32:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * aclocal.m4 [AM_SANITY_CHECK_CC]: added.
+ * configure.in: use it.
+
+Thu Oct 17 21:43:25 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * configure.in: add checks for locale.h and setlocale().
+ awk.h: include locale.h and define out setlocale() if not available.
+ main.c (main): call setlocale().
+ builtin.c (do_tolower, do_toupper): use unsigned char pointers,
+ to get other charsets right in different locales.
+
+Wed Oct 16 21:32:53 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (format_tree): Change initial buffer size to 512
+ and use a constant. Allows large values of %f per bug report
+ from sheyn@cs.bu.edu.
+
+Wed Oct 16 21:22:08 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in [MISC]: removed TAGS and tags
+ (local-distclean): added TAGS and tags
+ (maintainer-clean): removed TAGS and tags
+
+Wed Oct 16 12:28:43 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * main.c (version): Add call to copyleft(), per new standards.
+ version.c: Fix text of version string to match new standards.
+
+Sun Oct 6 22:19:45 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * regex.c: updated to Emacs 19.34b base.
+
+Sun Oct 6 21:57:34 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * re.c (make_regexp): fixed to handle \8 and \9 in the middle
+ of a regexp.
+
+Fri Oct 4 10:26:16 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.y (snode): remove case for do_split; always making the
+ third arg a Node_regex is wrong.
+ field.c (do_split): rationalized to distinguish `/ /' from `" "'.
+ Generally fixed up.
+ * node.c (parse_escape): Allow single digit \x escapes.
+
+1996-10-02 Paul Eggert <eggert@twinsun.com>
+
+ * builtin.c (format_tree):
+ Fix bug in %d and %i format: NaNs, and values
+ in the range LONG_MAX+1 .. ULONG_MAX, were mishandled.
+ Don't assume that double values <= -1 are converted to unsigned
+ long in the expected way; the C Standard doesn't guarantee this.
+
+1996-10-02 Paul Eggert <eggert@twinsun.com>
+
+ * awk.h (INT_MAX): Remove unused symbol.
+
+Mon Sep 30 22:19:11 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * getopt.c (_getopt_internal): If 'W' is in the optstring followed
+ by a ';' then search through the long opts table. This makes
+ `-W foo=bar' same as `--foo=bar'.
+ * main.c (main): 'W' now prints an error message.
+ (gawk_option): deleted the routine.
+
+Sun Sep 29 23:04:54 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (sub_common): fix several bugs with gsub when
+ matching null strings. See test/gsubtest.awk.
+
+Fri Sep 20 17:35:54 1996 Pat Rankin <rankin@eql.caltech.edu>
+
+ * alloca.c (NULL): don't define if <config.h> has already done so.
+
+Fri Sep 20 11:54:31 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_print): evaluate all the expressions first and
+ then print them. Avoids suprising behavior. See test/prtoeval.awk
+ for an example.
+
+Tue Sep 10 06:21:40 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h [FUNC]: new flag, marks a Node_parameter_list as really
+ being the function name; allows more checking in awk.y.
+ * awk.y (isassignable): now takes a NODE * instead of a type, to
+ check if a function parameter is marked FUNC, then it's the function
+ name, which is not assignable. Fix call from snode().
+ (function_prologue): mark function name as FUNC.
+ (yyerror): don't call exit() anymore; gawk will now report
+ all syntax errors.
+
+Sun Sep 1 19:36:30 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * field.c (rebuild_record): after building new field 0, go through
+ all old fields, and if they used to point into the old one,
+ have them point into the new one. Then turn off PERM flag before
+ unref-ing field 0.
+
+Wed Aug 28 19:13:34 1996 Arnold D. Robbins <arnold@math.utah.edu>
+
+ * eval.c (set_IGNORECASE): Correctly parenthesize bit operations
+ in test and fix logic for string value.
+
+Wed Aug 28 22:06:33 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * main.c (usage): add email addresses for bug reporting, per
+ change in GNU Coding Standards from RMS.
+
+Sun Aug 11 23:13:22 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (install): correct use of $(INSTALL_PROGRAM).
+
+Thu Aug 8 23:29:43 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * parse.y (isassignable): new function, checks in type can
+ be assigned to.
+ (snode): changed checking for 3rd arg of gsub to be more
+ general, supersedes earlier change.
+
+Thu Aug 8 13:58:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * parse.y (snode): If third arg to sub or gsub is builtin
+ function, complain, since can't substitute into result.
+ * eval.c (r_get_lhs): diagnose Node_builtin as an error, instead
+ of falling through into default case and using cant_happen().
+
+Thu Aug 1 07:13:14 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * regex.h [RE_DEBUG]: new macro.
+ [RE_SYNTAX_GNU_AWK]: add RE_DEBUG.
+ [RE_SYNTAX_POSIX_AWK]: add RE_INTERVALS.
+ * regex.c (re_set_syntax): add #ifdef DEBUG code to turn on `debug'
+ flag if RE_DEBUG set, and turn off debug if not set and debug
+ was on.
+ * main.c (main): remove `do_intervals = TRUE' from `if (do_posix)',
+ it's now handled in the definition of RE_SYNTAX_POSIX_AWK.
+
+Mon Jul 29 17:49:07 1996 Pat Rankin <rankin@eql.caltech.edu>
+
+ * io.c (O_ACCMODE): define it if <fcntl.h> doesn't.
+
+Mon Jul 29 12:02:48 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (set_IGNORECASE): made somewhat smarter. gawk -v IGNORECASE=0
+ was acting the same as -v IGNORECASE=1. Thanks to Darrell Hankerson
+ for the bug report.
+
+Fri Jul 26 12:04:43 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h (format_val): add declaration of new routine.
+ * node.c (format_val): new routine, abstracts old guts of
+ r_forcestring; accepts format string and index as additional params.
+ (r_force_string): changed to call format_val.
+ * builtin.c (do_print): don't tree_eval the tree twice in case
+ OFMTidx != CONVFMTidx; doing so could cause side effects
+ (from bug report by Tobias Rettstadt, xassp@ipds.uni-kiel.de).
+ Instead, call format_val.
+
+Mon Jul 22 21:59:15 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (iop_close): change check for "is $0 in the input buffer"
+ to use `< (iop->buf + iop->secsiz + iop->size)' instead of
+ `< iop->end'. The latter is bogus if EOF has been hit on the
+ file. Fix from Darrel Hankerson based on bug report by
+ Charles Howes (howes@grid.direct.ca). See test/eofsplit.awk.
+
+Thu Jul 18 19:43:20 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (sub_common): backed out change of Feb 14 in favor of:
+ (do_gensub): Changed to use make_string and then to |= TEMP
+ flag, based on bug report and patch from Katsuyuki Okabe,
+ hgc02147@niftyserve.or.jp.
+
+Thu Jul 18 19:23:53 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * custom.h: added ifdef for QNX, based on bug report from
+ Michael Hunter, mphunter@qnx.com.
+
+Mon Jul 15 09:31:01 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (redirect): When finding the rp pointer, if it's not
+ NULL, set str = rp->value. This gets the '\0' terminated
+ version. Motivated by bug report from John Hawkinson
+ (jhawk@bbnplanet.com).
+
+Sun Jul 14 18:40:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * configure.in: added call to AC_CHECK_LIB(m, fmod), since
+ apparently some systems have fmod in the math library.
+ Portability: the Holy Grail. Sigh.
+
+Sun Jul 14 18:08:01 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h: add Jim Meyerings ISASCII etc hacks for ctype macros.
+ * builtin.c (do_toupper, do_tolower, sub_common): changed to use
+ upper-case versions of ctype macros.
+ * main.c (main): ditto.
+ * node.c (r_force_number, parse_escape): ditto.
+
+Sun Jul 14 06:34:18 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * field.c (set_record): made it always do the PERM flag.
+ Fixes cases where $0 is assigned to, e.g. by gsub, keeps
+ the fields valid.
+ (get_field): removed the call to reset_record in
+ case where ! field0_valid. We want to leave the fields alone
+ if they've been changed.
+
+Thu Jul 11 23:04:20 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (devopen): change tests of (flag & O_fooONLY) to
+ (flag & O_ACCMODE) == O_fooONLY. Per (long standing) bug
+ report from Chapman Flack.
+ (close_redir): change final conditional to just (status != 0)
+ so that ERRNO always set; the warning had its own `if (do_lint)'
+ anyway.
+ * eval.c (do_split): force type of array to be Node_var_array
+ instead of Node_var. Per (long standing) bug report from
+ Chapman Flack.
+
+Thu Jul 11 22:17:14 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (install): added symlink of gawk to awk if
+ no awk in $(bindir).
+ (LN_S): new variable for symlinking.
+ (uninstall): remove awk if it's the same gawk.
+ * Configure.in: Added call to AC_PROG_LN_S for Makefile.in.
+
+Sun Jul 7 15:47:13 1996 Arnold D. Robbins <arnold@infographix.com>
+
+ * main.c (main): made `--posix' turn on interval expressions.
+ Gawk now matches its documentation. (What a concept!)
+
+Wed Jul 3 15:02:48 1996 Arnold D. Robbins <arnold@infographix.com>
+
+ * regex.h, regex.c: upgraded to changes from Emacs 19.31.
+
+Fri May 17 08:46:07 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (get_a_record): added `continued' flag. Fix from
+ Darrell Hankerson for when RS = "\n|something".
+
+Wed May 15 02:34:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (awklib/all): now depends on gawk, fixes problem
+ with parrallel make.
+
+Tue May 14 15:02:52 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (format_tree): fix handling of '*' to deal with
+ negative value for fieldwidth -- make positive and turn on
+ left justify. Per bug report from Michael Brennan.
+
+Sun May 12 20:42:06 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * eval.c (r_get_lhs): case Node_subscript. Check if array name
+ is actually a function, fatal error if so.
+
+Sun May 5 10:11:52 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (redirect): call flush_io() before creating a new output pipe,
+ per bug report from Brian Kernighan (bwk@research.bell-labs.com).
+
+Fri Mar 15 06:38:33 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (install): use $(INSTALL_PROGRAM), not $(INSTALL).
+ (local-distclean): add `*~' to list of files to be removed.
+ (CFLAGS): now contains just @CFLAGS@.
+ (COMPFLAGS): replaces use of CFLAGS, has CFLAGS plus all the
+ other stuff.
+
+Wed Mar 13 14:19:38 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (mmap_get_record): fixed to not place sentinel at end
+ of mmap'ed object. Won't work if file is exact multiple of
+ disk block size. See comments in code for more info.
+ Thanks to Rick Adams (rick@uunet.uu.net) for help in testing.
+
+Sun Mar 10 22:50:23 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * io.c (do_close): notice if we were called as `close(FILENAME)'
+ and arrange to close the current input file. This turns out
+ to be easy to do, just call `nextfile(TRUE)'. Based on bug report
+ from Pascal A. Dupuis, <dupuis@lei.ucl.ac.be>.
+
+Thu Mar 7 08:08:51 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * field.c (init_fields, grow_fields, set_field, rebuild_record):
+ Nuke the `nodes' array everywhere. Anytime a field is unref'ed,
+ allocate a new node that is a copy of Nnull_string. This avoids
+ subtle memory management problems when doing a lot of assignment
+ to fields, and tweaking of NF. Make sure that fields_arr[0] always
+ has a type of Node_val!
+ * field.c (set_NF): If NF is decremented, clear fields between
+ NF and parse_high_water, otherwise if NF incremented, clear
+ fields between parse_high_water and NF.
+ * eval.c (nodetype2str): new function, used for diganostics.
+ eval.c (interpret): use nodetype2str when finding invalid node.
+
+Mon Mar 4 09:02:28 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_toupper, do_tolower): use isascii along with
+ isupper/islower before changing case, in case characters have
+ the high bit set. This is a hack.
+
+Mon Feb 26 22:24:44 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (sub_common): if no match, and called from gensub,
+ don't free the temporary string, since the tmp_number then
+ writes over it.
+
+Sun Feb 25 23:13:01 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (format_tree): fixed %c to treat user input as
+ numeric also by adding test for MAYBE_NUM.
+
+Tue Feb 20 12:25:50 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * configure.in: Added AC_FUNC_MMAP call and add madvise to
+ list of functions to look for.
+ * awk.h [IOP_ISMAPPED]: new flag value for mmap support and new
+ `getrec' structure member in struct iobuf.
+ * io.c (iop_alloc, iop_close): changed to map/unmap input file
+ into memory if possible.
+ (mmap_get_record): new function to actually retrieve the
+ record from mmaped file.
+
+Thu Feb 1 08:56:46 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_substr): fixed lint message to use indx+1 when
+ start position is past end of string.
+
+Sun Jan 28 07:00:56 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_substr): rationalized handling of missing length
+ argument, as well as various accompagnying lint warnings. Previous
+ code was slightly bogus. Talk about your Day 1 bugs.
+
+Thu Jan 25 14:09:11 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * builtin.c (do_substr): if length exceeds length of actual
+ string, do computation of needed substring length *after*
+ the lint warning.
+
+Wed Jan 24 10:06:16 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (gawk): Add $(CFLAGS) to link line.
+ (Makefile): target depends on the Makefile.in files.
+ (OTHERS): Added TAGS and tags to the distribution.
+ (local-distclean): New rule.
+ (distclean): Use it.
+ (maintainer-clean): Don't `make distclean' before running submakes,
+ since that removes makefiles needed for the submakes.
+ * builtin.c (do_strftime): Remove hard coded limit on length of result.
+ Based on code from Paul Eggert (eggert@twinsun.com).
+
+Mon Jan 22 13:16:37 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * main.c (usage): takes new fp parameter which is either
+ stdout for `--help' (per the GNU Coding Standards) or stderr
+ if an error occurs. Fix all calls.
+ (version): prints to stdout per the coding stds.
+ (copyleft): prints to stdout now, not stderr, and exits.
+
+Fri Jan 19 08:10:29 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * regex.h [RE_GNU_AWK]: added RE_CONTEXT_INDEP_OPS to set of
+ bits we turn off for regular operation. Breaks things like
+ /^+[0-9]+/ to match a literal `+' at the beginning of, say,
+ a phone number.
+
+Wed Jan 10 23:19:36 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * 3.0.0 polished up and release tar file made.
+
+Wed Dec 27 11:46:16 1995 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * 2.94.0 released to porting group (no, I haven't been good
+ about this file; I'll do better once 3.0 is released).
+
+Mon Aug 28 23:04:30 1995 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h updated for NeXT - bracket TRUE/FALSE
+ * io.c (get_a_record): removed shadowing of 'start' in
+ * Makefile.in and doc/Makefile.in: fixed to use gawk.1 and gawk.texi,
+ instead of gawk.1.in and gawk.texi.in.
+
+Mon Aug 25 11:04:30 1995 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * 2.90.0 released to porting group.
+
+Fri Aug 18 12:43:31 1995 Arnold D. Robbins <arnold@puny.ssc.com>
+
+ * ChangeLog created.
diff --git a/contrib/awk/FREEBSD-upgrade b/contrib/awk/FREEBSD-upgrade
new file mode 100644
index 0000000..d2add40f
--- /dev/null
+++ b/contrib/awk/FREEBSD-upgrade
@@ -0,0 +1,60 @@
+
+Import of GNU awk 3.0.3
+
+Original source available as ftp://prep.ai.mit.edu/pub/gnu/gawk-3.0.3.tar.gz
+
+The following files and directories were removed for this import:
+
+Makefile.in
+README_d/README.VMS
+README_d/README.atari
+README_d/README.irix
+README_d/README.linux
+README_d/README.pc
+README_d/README.sco
+README_d/README.sgi
+README_d/README.solaris
+README_d/README.sony
+README_d/README.sunos4
+README_d/README.ultrix
+README_d/README.yacc
+aclocal.m4
+alloca.c
+atari/
+awklib/
+awktab.c
+configh.in
+configure
+configure.in
+doc/Makefile.in
+doc/README.card
+doc/ad.block
+doc/awkcard.in
+doc/awkforai.txt
+doc/cardfonts
+doc/colors
+doc/igawk.1
+doc/macros
+doc/no.colors
+doc/setter.outline
+doc/texinfo.tex
+install-sh
+missing/
+mkinstalldirs
+pc/
+protos.h
+regex.c
+stamp-h.in
+vms/
+
+In addition, doc/gawk.1 and doc/gawk.texi were renamed to awk.1 and awk.texi.
+
+The test sub-directory has been left in, as, although not necessary to build
+awk on FreeBSD, it will be useful to anyone changing the code. To use it,
+do something like
+
+cd /usr/src/contrib/awk
+ln -s /path/to/new/awk gawk
+cd test && make
+
+jraynard@freebsd.org 26 Sept 1997
diff --git a/contrib/awk/FUTURES b/contrib/awk/FUTURES
new file mode 100644
index 0000000..13a312c
--- /dev/null
+++ b/contrib/awk/FUTURES
@@ -0,0 +1,108 @@
+This file lists future projects and enhancements for gawk. Items are listed
+in roughly the order they will be done for a given release. This file is
+mainly for use by the developers to help keep themselves on track, please
+don't bug us too much about schedules or what all this really means.
+
+With the 3.0 release, we are acknowledging that awk is not PERL, nor should
+it become PERL. (To paraphrase Dennis Ritchie, "If you want PERL, you
+know where to get it.")
+
+The focus on the future is thus narrowed to performance and functional
+enhancements, with only minor plans for significant new features.
+
+For 3.0
+=======
+ DONE: Move to autoconf-based configure system.
+
+ DONE: Allow RS to be a regexp.
+
+ DONE: RT variable to hold text of record terminator
+
+ DONE: split() with null string as third arg to split up strings
+
+ DONE: Analogously, setting FS="" would split the input record into
+ individual characters.
+
+ DONE: Generalize IGNORECASE
+ - any value makes it work, not just numeric non-zero
+ - make it apply to *all* string comparisons
+
+ DONE: Incorporate newer dfa.c and regex.c
+
+ DONE: Go to POSIX regexps
+
+ DONE: Make regex + dfa less dependant on gawk header file includes
+
+ DONE: Source code formatting cleaned up and regularized
+
+ DONE: Clean up code by isolating system-specific functions in
+ separate files.
+
+ DONE: General sub function:
+ gensub(pat, sub, global_flag[, line])
+ that return the substituted strings and allow \1 etc.
+ in the sub string.
+
+ DONE: Add AWKPATH to ENVIRON if it's not there
+
+ DONE: Undertake significant directory reorganization.
+
+ DONE: Extensive manual cleanup:
+ Use of texinfo 2.0 features
+ Lots more examples
+ Document posix regexps
+ Document all of the above.
+
+In 3.1
+======
+ A PROCINFO array to replace /dev/pid, /dev/user, et al.
+
+ DONE: Use mmap to read input files on systems that support it.
+
+ Add `abort' statement a la Thompson awk.
+
+ Consider removing use of and/or need for the protos.h file.
+
+ Use a new or improved dfa.
+
+ Integrate GNU NLS support.
+
+ Bring out hooks for NLS support into gawk itself.
+
+ DBM storage of awk arrays. Try to allow multiple dbm packages.
+
+ Use GNU malloc.
+
+ Use rx instead of regex.
+
+ DONE: Do a reference card.
+
+ ? Have strftime() pay attention to the value of ENVIRON["TZ"]
+
+ Additional manual features:
+ Document use of dbm arrays
+ Document NLS support
+ ? Add exercises
+ ? Add an error messages section to the manual
+ ? A section on where gawk is bounded
+ regex
+ i/o
+ sun fp conversions
+
+For 3.2
+=======
+ Add a lint check if the return value of a function is used but
+ the function did not supply a value.
+
+ Do an optimization pass over parse tree?
+
+ Make awk '/foo/' files... run at egrep speeds
+
+For 4.x:
+========
+
+Create a gawk compiler?
+
+Create a gawk-to-C translator? (or C++??)
+
+Provide awk profiling and debugging.
diff --git a/contrib/awk/INSTALL b/contrib/awk/INSTALL
new file mode 100644
index 0000000..a2c8722
--- /dev/null
+++ b/contrib/awk/INSTALL
@@ -0,0 +1,181 @@
+Basic Installation
+==================
+
+ These are generic installation instructions.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, a file
+`config.cache' that saves the results of its tests to speed up
+reconfiguring, and a file `config.log' containing compiler output
+(useful mainly for debugging `configure').
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If at some point `config.cache'
+contains results you don't want to keep, you may remove or edit it.
+
+ The file `configure.in' is used to create `configure' by a program
+called `autoconf'. You only need `configure.in' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system. If you're
+ using `csh' on an old version of System V, you might need to type
+ `sh ./configure' instead to prevent `csh' from trying to execute
+ `configure' itself.
+
+ Running `configure' takes awhile. While running, it prints some
+ messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+Compilers and Options
+=====================
+
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. You can give `configure'
+initial values for variables by setting them in the environment. Using
+a Bourne-compatible shell, you can do that on the command line like
+this:
+ CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
+
+Or on systems that have the `env' program, you can do it like this:
+ env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
+
+Compiling For Multiple Architectures
+====================================
+
+ You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ If you have to use a `make' that does not supports the `VPATH'
+variable, you have to compile the package for one architecture at a time
+in the source code directory. After you have installed the package for
+one architecture, use `make distclean' before reconfiguring for another
+architecture.
+
+Installation Names
+==================
+
+ By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc. You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+ Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+ There may be some features `configure' can not figure out
+automatically, but needs to determine by the type of host the package
+will run on. Usually `configure' can figure that out, but if it prints
+a message saying it can not guess the host type, give it the
+`--host=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name with three fields:
+ CPU-COMPANY-SYSTEM
+
+See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the host type.
+
+ If you are building compiler tools for cross-compiling, you can also
+use the `--target=TYPE' option to select the type of system they will
+produce code for and the `--build=TYPE' option to select the type of
+system on which you are compiling the package.
+
+Sharing Defaults
+================
+
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Operation Controls
+==================
+
+ `configure' recognizes the following options to control how it
+operates.
+
+`--cache-file=FILE'
+ Use and save the results of the tests in FILE instead of
+ `./config.cache'. Set FILE to `/dev/null' to disable caching, for
+ debugging `configure'.
+
+`--help'
+ Print a summary of the options to `configure', and exit.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made.
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`--version'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`configure' also accepts some other, not widely useful, options.
+
diff --git a/contrib/awk/LIMITATIONS b/contrib/awk/LIMITATIONS
new file mode 100644
index 0000000..05e8bc4
--- /dev/null
+++ b/contrib/awk/LIMITATIONS
@@ -0,0 +1,16 @@
+This file describes limits of gawk on a Unix system (although it
+is variable even then). Non-Unix systems may have other limits.
+
+# of fields in a record: MAX_LONG
+Length of input record: MAX_INT
+Length of output record: unlimited
+Size of a field: MAX_INT
+Size of a printf string: MAX_INT
+Size of a literal string: MAX_INT
+Characters in a character class: 2^(# of bits per byte)
+# of file redirections: unlimited
+# of pipe redirections: min(# of processes per user, # of open files)
+double-precision floating point
+Length of source line: unlimited
+Number of input records in one file: MAX_LONG
+Number of input records total: MAX_LONG
diff --git a/contrib/awk/NEWS b/contrib/awk/NEWS
new file mode 100644
index 0000000..2a3e7fe
--- /dev/null
+++ b/contrib/awk/NEWS
@@ -0,0 +1,1802 @@
+Changes from 3.0.2 to 3.0.3
+---------------------------
+
+The horrendous per-record memory leak introduced in 3.0.1 is gone, finally.
+
+The `amiga' directory is now gone; Amiga support is now entirely handled
+by the POSIX support.
+
+Win32 support has been added in the `pc' directory. See `README_d/README.pc'
+for more info.
+
+The mmap changes are disabled in io.c, and will be removed entirely
+in the next big release. They were an interesting experiment that just
+really didn't work in practice.
+
+A minor memory leak that occurred when using `next' from within a
+function has also been fixed.
+
+Problems with I/O from sub-processes via a pipe are now gone.
+
+Using "/dev/pid" and the other special /dev files no longer causes a core dump.
+
+The files regex.h, regex.c, getopt.h, getopt.c, and getopt1.c have been
+merged with the versions in GNU libc. Thanks to Ulrich Drepper for his help.
+
+Some new undocumented features have been added. Use the source, Luke!
+It is not clear yet whether these will ever be fully supported.
+
+Array performance should be much better for very very large arrays. "Virtual
+memory required, real memory helpful."
+
+builtin.c:do_substr rationalized, again.
+
+The --re-interval option now works as advertised.
+
+The license text on some of the missing/* files is now generic.
+
+Lots more new test cases.
+
+Lots of other small bugs fixed, see the ChangeLog files for details.
+
+Changes from 3.0.1 to 3.0.2
+---------------------------
+
+Gawk now uses autoconf 2.12.
+
+strftime now behaves correctly if passed an empty format string or if
+the string formats to an empty result string.
+
+Several minor compilation and installation problems have been fixed.
+
+Minor page break issues in the user's guide have been fixed.
+
+Lexical errors no longer repeat ad infinitum.
+
+Changes from 3.0.0 to 3.0.1
+---------------------------
+
+Troff source for a handy-dandy five color reference card is now provided.
+Thanks to SSC for their macros.
+
+Gawk now behaves like Unix awk and mawk, in that newline acts as white
+space for separating fields and for split(), by default. In posix mode,
+only space and tab separate fields. The documentation has been updated to
+reflect this.
+
+Tons and tons of small bugs fixed and new tests added, see the ChangeLogs.
+
+Lots fewer compile time warnings from gcc -Wall. Remaining ones aren't
+worth fixing.
+
+Gawk now pays some attention to the locale settings.
+
+Fixes to gsub to catch several corner cases.
+
+The `print' statement now evaluates all expressions first, and then
+prints them. This leads to less suprising behaviour if any expression has
+output side effects.
+
+Miscellanious improvements in regex.h and regex.c.
+
+Gawk will now install itself as gawk-M.N.P in $(bindir), and link
+`gawk' to it. This makes it easy to have multiple versions of gawk
+simultaneously. It will also now install itself as `awk' in $(bindir)
+if there is no `awk' there. This is in addition to installing itself as
+`gawk'. This change benefits the Hurd, and possibly other systems. One
+day, gawk will drop the `g', but not yet.
+
+`--posix' turns on interval expressions. Gawk now matches its documentation.
+
+`close(FILENAME)' now does something meaningful.
+
+Field management code in field.c majorly overhauled, several times.
+
+The gensub code has been fixed, several bugs are now gone.
+
+Gawk will use mmap for data file input if it is available.
+
+The printf/sprintf code has been improved.
+
+Minor issues in Makefile setup worked on and improved.
+
+builtin.c:do_substr rationalized.
+
+Regex matching fixed so that /+[0-9]/ now matches the leading +.
+
+For building on vms, the default compiler is now DEC C rather than VAX C.
+
+Changes from 2.15.6 to 3.0.0
+----------------------------
+
+Fixed spelling of `Programming' in the copyright notice in all the files.
+
+New --re-interval option to turn on interval expressions. They're off
+by default, except for --posix, to avoid breaking old programs.
+
+Passing regexp constants as parameters to user defined functions now
+generates a lint warning.
+
+Several obscure regexp bugs fixed; alas, a small number remain.
+
+The manual has been thoroughly revised. It's now almost 50% bigger than
+it used to be.
+
+The `+' modifier in printf is now reset correctly for each item.
+
+The do_unix variable is now named do_traditional.
+
+Handling of \ in sub and gsub rationalized (somewhat, see the manual for
+the gory [and I do mean gory] details).
+
+IGNORECASE now uses ISO 8859-1 Latin-1 instead of straight ASCII. See the
+source for how to revert to pure ASCII.
+
+--lint will now warn if an assignment occurs in a conditional context.
+This may become obnoxious enough to need turning off in the future, but
+"it seemed like a good idea at the time."
+
+%hf and %Lf are now diagnosed as invalid in printf, just like %lf.
+
+Gawk no longer incorrectly closes stdin in child processes used in
+input pipelines.
+
+For integer formats, gawk now correctly treats the precision as the
+number of digits to print, not the number of characters.
+
+gawk is now much better at catching the use of scalar values when
+arrays are needed, both in function calls and the `x in y' constructs.
+
+New gensub function added. See the manual.
+
+If do_tradtional is true, octal and hex escapes in regexp constants are
+treated literally. This matches historical behavior.
+
+yylex/nextc fixed so that even null characters can be included
+in the source code.
+
+do_format now handles cases where a format specifier doesn't end in
+a control letter. --lint reports an error.
+
+strftime() now uses a default time format equivalent to that of the
+Unix date command, thus it can be called with no arguments.
+
+Gawk now catches functions that are used but not defined at parse time
+instead of at run time. (This is a lint error, making it fatal could break
+old code.)
+
+Arrays that max out are now handled correctly.
+
+Integer formats outside the range of an unsigned long are now detected
+correctly using the SunOS 4.x cc compiler.
+
+--traditional option added as new preferred name for --compat, in keeping
+with GCC.
+
+--lint-old option added, so that warnings about things not in old awk
+are only given if explicitly asked for.
+
+`next file' has changed to one word, `nextfile'. `next file' is still
+accepted but generates a lint warning. `next file' will go away eventually.
+
+Gawk with --lint will now notice empty source files and empty data files.
+
+Amiga support using the Unix emulation added. Thanks to fnf@ninemoons.com.
+
+test/Makefile is now "parallel-make safe".
+
+Gawk now uses POSIX regexps + GNU regex ops by default. --posix goes to
+pure posix regexps, and --compat goes to traditional Unix regexps. However,
+interval expressions, even though specified by POSIX, are turned off by
+default, to avoid breaking old code.
+
+IGNORECASE now applies to string comparison as well as regexp operations.
+
+The AT&T Bell Labs Research awk fflush builtin function is now supported.
+fflush is extended to flush stdout if no arg and everything if given
+the null string as an argument.
+
+If RS is more than one character, it is treated as a regular expression
+and records are delimited accordingly. The variable RT is set to the record
+terminator string. This is disabled in compatibility mode.
+
+If FS is set to the null string (or the third arg. of split() is the null
+string), splitting is done at every single character. This is disabled in
+compatibility mode.
+
+Gawk now uses the Autoconf generated configure script, doing away with all
+the config/* files and the machinery that went with them. The Makefile.in
+has also changed accordingly, complete with all the standard GNU Makefile
+targets. (Non-unix systems may still have their own config.h and Makefile;
+see the appropriate README_d/README.* and/or subdirectory.)
+
+The source code has been cleaned up somewhat and the formatting improved.
+
+Changes from 2.15.5 to 2.15.6
+-----------------------------
+
+Copyrights updated on all changed files.
+
+test directory enhanced with four new tests.
+
+Gawk now generates a warning for \x without following hexadecimal digits.
+In this case, it returns 'x', not \0.
+
+Several fixes in main.c related to variable initialization:
+ CONVFMT has a default value
+ resetup is called before initializing variables
+ the varinit table fixed up a bit (see the comments)
+
+gawk.1 updated with new BUG REPORTS section.
+
+A plain `print' inside a BEGIN or END now generates a lint warning (awk.y).
+
+Small fix in iop.c:get_a_record to avoid reading uninitialized memory.
+
+awk.y:yylex now does a better job of handling things if the source file
+does not end in a newline. Probably there is more work to be done.
+
+Memory leaks fixed in awk.y, particularly in cases of duplicate function
+parameters. Also, calling a function doesn't leak memory during parsing.
+
+Empty function bodies are now allowed (awk.y).
+
+Gawk now detects duplicate parameter names in functions (awk.y).
+
+New function `error' in msg.c added for use from awk.y.
+
+eval.c:r_get_lhs now checks if its argument is a parameter on the stack,
+and pulls down the real variable. This catches more 'using an array as
+a scalar' kinds of errors.
+
+main.c recovers C alloca space after parsing, this is important for
+bison-based parsers. re.c recovers C alloca space after doing an research.
+[Changes from Pat Rankin]
+
+builtin.c now declares the random() related functions based on
+RANDOM_MISSING from config.h. [Suggested by Pat Rankin]
+
+awk.h now handles alloca correctly for HP-UX. [Kaveh Ghazi]
+
+regex.h and config/cray60 updated for Unicos 8.0. [Hal Peterson]
+
+Fixed re.c and dfa.c so that gawk no longer leaks memory when using
+lots of dynamic regexps.
+
+Removed dependency on signed chars from `idx' variable in awk.h. Gawk
+now passes its test suite if compiled with `gcc -fno-signed-char'.
+
+Fixed warning on close in io.c to go under lint control. Too many people
+have complained about the spurious message, particularly when closing a
+child pipeline early.
+
+Gawk now correctly handles RS = "" when input is from a terminal
+(iop.c:get_a_record).
+
+Config file added for GNU.
+
+gawk 'BEGIN { exit 1 } ; END { exit }' now exits 1, as it should
+(eval.c:interpret).
+
+sub and gsub now follow posix, \ escapes both & and \. Each \ must
+be doubled initially in the program to get it into the string.
+Thanks to Mike Brennan for pointing this out (builtin.c:sub_common).
+
+If FS is "", gawk behaves like mawk and nawk, making the whole record be $1.
+Yet Another Dark Corner. Sigh (field.c:def_parse_field).
+
+Gawk now correctly recomputes string values for numbers if CONVFMT has
+changed (awk.h:force_string, node.c:r_force_string).
+
+A regexp of the form `/* this looks like a comment but is not */' will
+now generate a warning from --lint (awk.y).
+
+Gawk will no longer core dump if given an empty input file (awk.y:get_src_buf,
+iop.c:optimal_bufsize).
+
+A printf format of the form %lf is handled correctly. The `l' generates
+a lint warning (builtin.c:format_tree) [Thanks to Mark Moraes].
+
+Lynxos config file added.
+
+`continue' outside a loop treated as `next' only in compatibility mode,
+instead of by default; recent att nawk chokes on this now. `break'
+outside a loop now treated as `next' in compatibility mode (eval.c).
+
+Bug fix in string concatenation, an arbitrary number of expressions
+are allowed (eval.c).
+
+$1 += $2 now works correctly (eval.c).
+
+Changing IGNORECASE no longer resets field-splitting to FS if it was
+using FIELDWIDTHS (eval.c, field.c).
+
+Major enhancement: $0 and NF for last record read are now preserved
+into the END rule (io.c).
+
+Regexp fixes:
+ /./ now matches a newline (regex.h)
+ ^ and $ match beginning and end of string only, not any embedded
+ newlines (re.c)
+ regex.c should compile and work ok on 64-bit mips/sgi machines
+
+Changes from 2.15.4 to 2.15.5
+-----------------------------
+
+FUTURES file updated and re-arranged some with more rational schedule.
+
+Many prototypes handled better for ANSI C in protos.h.
+
+getopt.c updated somewhat.
+
+test/Makefile now removes junk directory, `bardargtest' renamed `badargs.'
+
+Bug fix in iop.c for RS = "". Eat trailing newlines off of record separator.
+
+Bug fix in Makefile.bsd44, use leading tab in actions.
+
+Fix in field.c:set_FS for FS == "\\" and IGNORECASE != 0.
+
+Config files updated or added:
+ cray60, DEC OSF/1 2.0, Utek, sgi405, next21, next30, atari/config.h,
+ sco.
+
+Fix in io.c for ENFILE as well as EMFILE, update decl of groupset to
+include OSF/1.
+
+Rationalized printing as integers if numbers are outside the range of a long.
+Changes to node.c:force_string and builtin.c.
+
+Made internal NF, NR, and FNR variables longs instead of ints.
+
+Add LIMITS_H_MISSING stuff to config.in and awk.h, and default defs for
+INT_MAX and LONG_MAX, if no limits.h file. Add a standard decl of
+the time() function for __STDC__. From ghazi@noc.rutgers.edu.
+
+Fix tree_eval in awk.h and r_tree_eval in eval.c to deal better with
+function parameters, particularly ones that are arrays.
+
+Fix eval.c to print out array names of arrays used in scalar contexts.
+
+Fix eval.c in interpret to zero out source and sourceline initially. This
+does a better job of providing source file and line number information.
+
+Fix to re_parse_field in field.c to not use isspace when RS = "", but rather
+to explicitly look for blank and tab.
+
+Fix to sc_parse_field in field.c to catch the case of the FS character at the
+end of a record.
+
+Lots of miscellanious bug fixes for memory leaks, courtesy Mark Moraes,
+also fixes for arrays.
+
+io.c fixed to warn about lack of explicit closes if --lint.
+
+Updated missing/strftime.c to match posted strftime 6.2.
+
+Bug fix in builtin.c, in case of non-match in sub_common.
+
+Updated constant used for division in builtin.c:do_rand for DEC Alpha
+and CRAY Y-MP.
+
+POSIXLY_CORRECT in the environment turns on --posix (fixed in main.c).
+
+Updated srandom prototype and calls in builtin.c.
+
+Fix awk.y to enforce posix semantics of unary +: result is numeric.
+
+Fix array.c to not rearrange the hash chain upon finding an index in
+the array. This messed things up in cases like:
+ for (index1 in array) {
+ blah
+ if (index2 in array) # blew away the for
+ stuff
+ }
+
+Fixed spelling errors in the man page.
+
+Fixes in awk.y so that
+ gawk '' /path/to/file
+will work without core dumping or finding parse errors.
+
+Fix main.c so that --lint will fuss about an empty program.
+Yet another fix for argument parsing in the case of unrecognized options.
+
+Bug fix in dfa.c to not attempt to free null pointers.
+
+Bug fix in builtin.c to only use DEFAULT_G_PRECISION for %g or %G.
+
+Bug fix in field.c to achieve call by value semantics for split.
+
+Changes from 2.15.3 to 2.15.4
+-----------------------------
+
+Lots of lint fixes, and do_sprintf made mostly ANSI C compatible.
+
+Man page updated and edited.
+
+Copyrights updated.
+
+Arrays now grow dynamically, initially scaling up by an order of magnitude
+ and then doubling, up to ~ 64K. This should keep gawk's performance
+ graceful under heavy load.
+
+New `delete array' feature added. Only documented in the man page.
+
+Switched to dfa and regex suites from grep-2.0. These offer the ability to
+ move to POSIX regexps in the next release.
+
+Disabled GNU regex ops.
+
+Research awk -m option now recognized. It does nothing in gawk, since gawk
+ has no static limits. Only documented in the man page.
+
+New bionic (faster, better, stronger than before) hashing function.
+
+Bug fix in argument handling. `gawk -X' now notices there was no program.
+ Additional bug fixes to make --compat and --lint work again.
+
+Many changes for systems where sizeof(int) != sizeof(void *).
+
+Add explicit alloca(0) in io.c to recover space from C alloca.
+
+Fixed file descriptor leak in io.c.
+
+The --version option now follows the GNU coding standards and exits.
+
+Fixed several prototypes in protos.h.
+
+Several tests updated. On Solaris, warn that the out? tests will fail.
+
+Configuration files for SunOS with cc and Solaris 2.x added.
+
+Improved error messages in awk.y on gawk extensions if do_unix or do_compat.
+
+INSTALL file added.
+
+Fixed Atari Makefile and several VMS specific changes.
+
+Better conversion of numbers to strings on systems with broken sprintfs.
+
+Changes from 2.15.2 to 2.15.3
+-----------------------------
+
+Increased HASHSIZE to a decent number, 127 was way too small.
+
+FILENAME is now the null string in a BEGIN rule.
+
+Argument processing fixed for invalid options and missing arguments.
+
+This version will build on VMS. This included a fix to close all files
+ and pipes opened with redirections before closing stdout and stderr.
+
+More getpgrp() defines.
+
+Changes for BSD44: <sys/param.h> in io.c and Makefile.bsd44.
+
+All directories in the distribution are now writable.
+
+Separated LDFLAGS and CFLAGS in Makefile. CFLAGS can now be overridden by
+ user.
+
+Make dist now builds compressed archives ending in .gz and runs doschk.
+
+Amiga port.
+
+New getopt.c fixes Alpha OSF/1 problem.
+
+Make clean now removes possible test output.
+
+Improved algorithm for multiple adjacent string concatenations leads to
+ performance improvements.
+
+Fix nasty bug whereby command-line assignments, both with -v and at run time,
+ could create variables with syntactically illegal names.
+
+Fix obscure bug in printf with %0 flag and filling.
+
+Add a lint check for substr if provided length exceeds remaining characters
+ in string.
+
+Update atari support.
+
+PC support enhanced to include support for both DOS and OS/2. (Lots more
+ #ifdefs. Sigh.)
+
+Config files for Hitachi Unix and OSF/1, courtesy of Yoko Morishita
+ (morisita@sra.co.jp)
+
+Changes from 2.15.1 to 2.15.2
+-----------------------------
+
+Additions to the FUTURES file.
+
+Document undefined order of output when using both standard output
+ and /dev/stdout or any of the /dev output files that gawk emulates in
+ the absence of OS support.
+
+Clean up the distribution generation in Makefile.in: the info files are
+ now included, the distributed files are marked read-only and patched
+ distributions are now unpacked in a directory named with the patch level.
+
+Changes from 2.15 to 2.15.1
+---------------------------
+
+Close stdout and stderr before all redirections on program exit. This allows
+ detection of write errors and also fixes the messages test on Solaris 2.x.
+
+Removed YYMAXDEPTH define in awk.y which was limiting the parser stack depth.
+
+Changes to config/bsd44, Makefile.bsd44 and configure to bring it into line
+ with the BSD4.4 release.
+
+Changed Makefile to use prefix, exec_prefix, bindir etc.
+
+make install now installs info files.
+
+make install now sets permissions on installed files.
+
+Make targets added: uninstall, distclean, mostlyclean and realclean.
+
+Added config.h to cleaner and clobber make targets.
+
+Changes to config/{hpux8x,sysv3,sysv4,ultrix41} to deal with alloca().
+
+Change to getopt.h for portability.
+
+Added more special cases to the getpgrp() call.
+
+Added README.ibmrt-aos and config/ibmrt-aos.
+
+Changes from 2.14 to 2.15
+---------------------------
+
+Command-line source can now be mixed with library functions.
+
+ARGIND variable tracks index in ARGV of FILENAME.
+
+GNU style long options in addition to short options.
+
+Plan 9 style special files interpreted by gawk:
+ /dev/pid
+ /dev/ppid
+ /dev/pgrpid
+ /dev/user
+ $1 = getuid
+ $2 = geteuid
+ $3 = getgid
+ $4 = getegid
+ $5 ... $NF = getgroups if supported
+
+ERRNO variable contains error string if getline or close fails.
+
+Very old options -a and -e have gone away.
+
+Inftest has been removed from the default target in test/Makefile -- the
+ results were too machine specific and resulted in too many false alarms.
+
+A README.amiga has been added.
+
+The "too many arguments supplied for format string" warning message is only
+ in effect under the lint option.
+
+Code improvements in dfa.c.
+
+Fixed all reported bugs:
+
+ Writes are checked for failure (such as full filesystem).
+
+ Stopped (at least some) runaway error messages.
+
+ gsub(/^/, "x") does the right thing for $0 of 0, 1, or more length.
+
+ close() on a command being piped to a getline now works properly.
+
+ The input record will no longer be freed upon an explicit close()
+ of the input file.
+
+ A NUL character in FS now works.
+
+ In a substitute, \\& now means a literal backslash followed by what
+ was matched.
+
+ Integer overflow of substring length in substr() is caught.
+
+ An input record without a newline termination is handled properly.
+
+ In io.c, check is against only EMFILE so that system file table
+ is not filled.
+
+ Renamed all files with names longer than 14 characters.
+
+ Escaped characters in regular expressions were being lost when
+ IGNORECASE was used.
+
+ Long source lines were not being handled properly.
+
+ Sourcefiles that ended in a tab but no newline were bombing.
+
+ Patterns that could match zero characters in split() were not working
+ properly.
+
+ The parsedebug option was not working.
+
+ The grammar was being a bit too lenient, allowing some very dubious
+ programs to pass.
+
+ Compilation with DEBUG defined now works.
+
+ A variable read in with getline was not being treated as a potential
+ number.
+
+ Array subscripts were not always of string type.
+
+
+Changes from 2.13.2 to 2.14
+---------------------------
+
+Updated manual!
+
+Added "next file" to skip efficiently to the next input file.
+
+Fixed potential of overflowing buffer in do_sprintf().
+
+Plugged small memory leak in sub_common().
+
+EOF on a redirect is now "sticky" -- it can only be cleared by close()ing
+ the pipe or file.
+
+Now works if used via a #! /bin/gawk line at the top of an executable file
+ when that line ends with whitespace.
+
+Added some checks to the grammar to catch redefinition of builtin functions.
+ This could eventually be the basis for an extension to allow redefining
+ functions, but in the mean time it's a good error catching facility.
+
+Negative integer exponents now work.
+
+Modified do_system() to make sure it had a non-null string to be passed
+ to system(3). Thus, system("") will flush any pending output but not go
+ through the overhead of forking an un-needed shell.
+
+A fix to floating point comparisons so that NaNs compare right on IEEE systems.
+
+Added code to make sure we're not opening directories for reading and such.
+
+Added code to do better diagnoses of weird or null file names.
+
+Allow continue outside of a loop, unless in strict posix mode. Lint option
+ will issue warning.
+
+New missing/strftime.c. There has been one change that affects gawk. Posix
+ now defines a %V conversion so the vms conversion has been changed to %v.
+ If this version is used with gawk -Wlint and they use %V in a call to
+ strftime, they'll get a warning.
+
+Error messages now conform to GNU standard (I hope).
+
+Changed comparisons to conform to the description found in the file POSIX.
+ This is inconsistent with the current POSIX draft, but that is broken.
+ Hopefully the final POSIX standard will conform to this version.
+ (Alas, this will have to wait for 1003.2b, which will be a revision to
+ the 1003.2 standard. That standard has been frozen with the broken
+ comparison rules.)
+
+The length of a string was a short and now is a size_t.
+
+Updated VMS help.
+
+Added quite a few new tests to the test suite and deleted many due to lack of
+ written releases. Test output is only removed if it is identical to the
+ "good" output.
+
+Fixed a couple of bugs for reference to $0 when $0 is "" -- particularly in
+ a BEGIN block.
+
+Fixed premature freeing in construct "$0 = $0".
+
+Removed the call to wait_any() in gawk_popen(), since on at least some systems,
+ if gawk's input was from a pipe, the predecessor process in the pipe was a
+ child of gawk and this caused a deadlock.
+
+Regexp can (once again) match a newline, if given explicitly.
+
+nextopen() makes sure file name is null terminated.
+
+Fixed VMS pipe simulation. Improved VMS I/O performance.
+
+Catch . used in variable names.
+
+Fixed bug in getline without redirect from a file -- it was quitting after the
+ first EOF, rather than trying the next file.
+
+Fixed bug in treatment of backslash at the end of a string -- it was bombing
+ rather than doing something sensible. It is not clear what this should mean,
+ but for now I issue a warning and take it as a literal backslash.
+
+Moved setting of regexp syntax to before the option parsing in main(), to
+ handle things like -v FS='[.,;]'
+
+Fixed bug when NF is set by user -- fields_arr must be expanded if necessary
+ and "new" fields must be initialized.
+
+Fixed several bugs in [g]sub() for no match found or the match is 0-length.
+
+Fixed bug where in gsub() a pattern anchored at the beginning would still
+ substitute throughout the string.
+
+make test does not assume that . is in PATH.
+
+Fixed bug when a field beyond the end of the record was requested after
+ $0 was altered (directly or indirectly).
+
+Fixed bug for assignment to field beyond end of record -- the assigned value
+ was not found on subsequent reference to that field.
+
+Fixed bug for FS a regexp and it matches at the end of a record.
+
+Fixed memory leak for an array local to a function.
+
+Fixed hanging of pipe redirection to getline
+
+Fixed coredump on access to $0 inside BEGIN block.
+
+Fixed treatment of RS = "". It now parses the fields correctly and strips
+ leading whitespace from a record if FS is a space.
+
+Fixed faking of /dev/stdin.
+
+Fixed problem with x += x
+
+Use of scalar as array and vice versa is now detected.
+
+IGNORECASE now obeyed for FS (even if FS is a single alphabetic character).
+
+Switch to GPL version 2.
+
+Renamed awk.tab.c to awktab.c for MSDOS and VMS tar programs.
+
+Renamed this file (CHANGES) to NEWS.
+
+Use fmod() instead of modf() and provide FMOD_MISSING #define to undo
+ this change.
+
+Correct the volatile declarations in eval.c.
+
+Avoid errant closing of the file descriptors for stdin, stdout and stderr.
+
+Be more flexible about where semi-colons can occur in programs.
+
+Check for write errors on all output, not just on close().
+
+Eliminate the need for missing/{strtol.c,vprintf.c}.
+
+Use GNU getopt and eliminate missing/getopt.c.
+
+More "lint" checking.
+
+
+Changes from 2.13.1 to 2.13.2
+-----------------------------
+
+Toward conformity with GNU standards, configure is a link to mkconf, the latter
+ to disappear in the next major release.
+
+Update to config/bsd43.
+
+Added config/apollo, config/msc60, config/cray2-50, config/interactive2.2
+
+sgi33.cc added for compilation using cc rather than gcc.
+
+Ultrix41 now propagates to config.h properly -- as part of a general
+ mechanism in configure for kludges -- #define anything from a config file
+ just gets tacked onto the end of config.h -- to be used sparingly.
+
+Got rid of an unnecessary and troublesome declaration of vprintf().
+
+Small improvement in locality of error messages.
+
+Try to diagnose use of array as scalar and vice versa -- to be improved in
+ the future.
+
+Fix for last bug fix for Cray division code--sigh.
+
+More changes to test suite to explicitly use sh. Also get rid of
+ a few generated files.
+
+Fixed off-by-one bug in string concatenation code.
+
+Fix for use of array that is passed in from a previous function parameter.
+ Addition to test suite for above.
+
+A number of changes associated with changing NF and access to fields
+ beyond the end of the current record.
+
+Change to missing/memcmp.c to avoid seg. fault on zero length input.
+
+Updates to test suite (including some inadvertently left out of the last patch)
+ to invoke sh explicitly (rather than rely on #!/bin/sh) and remove some
+ junk files. test/chem/good updated to correspond to bug fixes.
+
+Changes from 2.13.0 to 2.13.1
+-----------------------------
+
+More configs and PORTS.
+
+Fixed bug wherein a simple division produced an erroneous FPE, caused by
+ the Cray division workaround -- that code is now #ifdef'd only for
+ Cray *and* fixed.
+
+Fixed bug in modulus implementation -- it was very close to the above
+ code, so I noticed it.
+
+Fixed portability problem with limits.h in missing.c
+
+Fixed portability problem with tzname and daylight -- define TZNAME_MISSING
+ if strftime() is missing and tzname is also.
+
+Better support for Latin-1 character set.
+
+Fixed portability problem in test Makefile.
+
+Updated PROBLEMS file.
+
+=============================== gawk-2.13 released =========================
+Changes from 2.12.42 to 2.12.43
+-------------------------------
+
+Typo in awk.y
+
+Fixed up strftime.3 and added doc. for %V.
+
+Changes from 2.12.41 to 2.12.42
+-------------------------------
+
+Fixed bug in devopen() -- if you had write permission in /dev,
+ it would just create /dev/stdout etc.!!
+
+Final (?) VMS update.
+
+Make NeXT use GFMT_WORKAROUND
+
+Fixed bug in sub_common() for substitute on zero-length match. Improved the
+ code a bit while I was at it.
+
+Fixed grammar so that $i++ parses as ($i)++
+
+Put support/* back in the distribution (didn't I already do this?!)
+
+Changes from 2.12.40 to 2.12.41
+-------------------------------
+
+VMS workaround for broken %g format.
+
+Changes from 2.12.39 to 2.12.40
+-------------------------------
+
+Minor man page update.
+
+Fixed latent bug in redirect().
+
+Changes from 2.12.38 to 2.12.39
+-------------------------------
+
+Updates to test suite -- remove dependence on changing gawk.1 man page.
+
+Changes from 2.12.37 to 2.12.38
+-------------------------------
+
+Fixed bug in use of *= without whitespace following.
+
+VMS update.
+
+Updates to man page.
+
+Option handling updates in main.c
+
+test/manyfiles redone and added to bigtest.
+
+Fixed latent (on Sun) bug in handling of save_fs.
+
+Changes from 2.12.36 to 2.12.37
+-------------------------------
+
+Update REL in Makefile-dist. Incorporate test suite into main distribution.
+
+Minor fix in regtest.
+
+Changes from 2.12.35 to 2.12.36
+-------------------------------
+
+Release takes on dual personality -- 2.12.36 and 2.13.0 -- any further
+ patches before public release won't count for 2.13, although they will for
+ 2.12 -- be careful to avoid confusion! patchlevel.h will be the last thing
+ to change.
+
+Cray updates to deal with arithmetic problems.
+
+Minor test suite updates.
+
+Fixed latent bug in parser (freeing memory).
+
+Changes from 2.12.34 to 2.12.35
+-------------------------------
+
+VMS updates.
+
+Flush stdout at top of err() and stderr at bottom.
+
+Fixed bug in eval_condition() -- it wasn't testing for MAYBE_NUM and
+ doing the force_number().
+
+Included the missing manyfiles.awk and a new test to catch the above bug which
+ I am amazed wasn't already caught by the test suite -- it's pretty basic.
+
+Changes from 2.12.33 to 2.12.34
+-------------------------------
+
+Atari updates -- including bug fix.
+
+More VMS updates -- also nuke vms/version.com.
+
+Fixed bug in handling of large numbers of redirections -- it was probably never
+ tested before (blush!).
+
+Minor rearrangement of code in r_force_number().
+
+Made chem and regtest tests a bit more portable (Ultrix again).
+
+Added another test -- manyfiles -- not invoked under any other test -- very Unix
+ specific.
+
+Rough beginning of LIMITATIONS file -- need my AWK book to complete it.
+
+Changes from 2.12.32 to 2.12.33
+-------------------------------
+
+Expunge debug.? from various files.
+
+Remove vestiges of Floor and Ceil kludge.
+
+Special case integer division -- mainly for Cray, but maybe someone else
+ will benefit.
+
+Workaround for iop_close closing an output pipe descriptor on Cray --
+ not conditional since I think it may fix a bug on SGI as well and I don't
+ think it can hurt elsewhere.
+
+Fixed memory leak in assoc_lookup().
+
+Small cleanup in test suite.
+
+Changes from 2.12.31 to 2.12.32
+-------------------------------
+
+Nuked debug.c and debugging flag -- there are better ways.
+
+Nuked version.sh and version.c in subdirectories.
+
+Fixed bug in handling of IGNORECASE.
+
+Fixed bug when FIELDWIDTHS was set via -v option.
+
+Fixed (obscure) bug when $0 is assigned a numerical value.
+
+Fixed so that escape sequences in command-line assignments work (as it already
+ said in the comment).
+
+Added a few cases to test suite.
+
+Moved support/* back into distribution.
+
+VMS updates.
+
+Changes from 2.12.30 to 2.12.31
+-------------------------------
+
+Cosmetic manual page changes.
+
+Updated sunos3 config.
+
+Small changes in test suite including renaming files over 14 chars. in length.
+
+Changes from 2.12.29 to 2.12.30
+-------------------------------
+
+Bug fix for many string concatenations in a row.
+
+Changes from 2.12.28 to 2.12.29
+-------------------------------
+
+Minor cleanup in awk.y
+
+Minor VMS update.
+
+Minor atari update.
+
+Changes from 2.12.27 to 2.12.28
+-------------------------------
+
+Got rid of the debugging goop in eval.c -- there are better ways.
+
+Sequent port.
+
+VMS changes left out of the last patch -- sigh! config/vms.h renamed
+ to config/vms-conf.h.
+
+Fixed missing/tzset.c
+
+Removed use of gcvt() and GCVT_MISSING -- turns out it was no faster than
+ sprintf("%g") and caused all sorts of portability headaches.
+
+Tuned get_field() -- it was unnecessarily parsing the whole record on reference
+ to $0.
+
+Tuned interpret() a bit in the rule_node loop.
+
+In r_force_number(), worked around bug in Uglix strtod() and got rid of
+ ugly do{}while(0) at Michal's urging.
+
+Replaced do_deref() and deref with unref(node) -- much cleaner and a bit faster.
+
+Got rid of assign_number() -- contrary to comment, it was no faster than
+ just making a new node and freeing the old one.
+
+Replaced make_number() and tmp_number() with macros that call mk_number().
+
+Changed freenode() and newnode() into macros -- the latter is getnode()
+ which calls more_nodes() as necessary.
+
+Changes from 2.12.26 to 2.12.27
+-------------------------------
+
+Completion of Cray 2 port (includes a kludge for floor() and ceil()
+ that may go or be changed -- I think that it may just be working around
+ a bug in chem that is being tweaked on the Cray).
+
+More VMS updates.
+
+Moved kludge over yacc's insertion of malloc and realloc declarations
+ from protos.h to the Makefile.
+
+Added a lisp interpreter in awk to the test suite. (Invoked under
+ bigtest.)
+
+Cleanup in r_force_number() -- I had never gotten around to a thorough
+ profile of the cache code and it turns out to be not worth it.
+
+Performance boost -- do lazy force_number()'ing for fields etc. i.e.
+ flag them (MAYBE_NUM) and call force_number only as necessary.
+
+Changes from 2.12.25 to 2.12.26
+-------------------------------
+
+Rework of regexp stuff so that dynamic regexps have reasonable
+ performance -- string used for compiled regexp is stored and
+ compared to new string -- if same, no recompilation is necessary.
+ Also, very dynamic regexps cause dfa-based searching to be turned
+ off.
+
+Code in dev_open() is back to returning fileno(std*) rather than
+ dup()ing it. This will be documented. Sorry for the run-around
+ on this.
+
+Minor atari updates.
+
+Minor vms update.
+
+Missing file from MSDOS port.
+
+Added warning (under lint) if third arg. of [g]sub is a constant and
+ handle it properly in the code (i.e. return how many matches).
+
+Changes from 2.12.24 to 2.12.25
+-------------------------------
+
+MSDOS port.
+
+Non-consequential changes to regexp variables in preparation for
+ a more serious change to fix a serious performance problem.
+
+Changes from 2.12.23 to 2.12.24
+-------------------------------
+
+Fixed bug in output flushing introduced a few patches back. This caused
+ serious performance losses.
+
+Changes from 2.12.22 to 2.12.23
+-------------------------------
+
+Accidentally left config/cray2-60 out of last patch.
+
+Added some missing dependencies to Makefile.
+
+Cleaned up mkconf a bit; made yacc the default parser (no alloca needed,
+ right?); added rs6000 hook for signed characters.
+
+Made regex.c with NO_ALLOCA undefined work.
+
+Fixed bug in dfa.c for systems where free(NULL) bombs.
+
+Deleted a few cant_happen()'s that *really* can't hapen.
+
+Changes from 2.12.21 to 2.12.22
+-------------------------------
+
+Added to config stuff the ability to choose YACC rather than bison.
+
+Fixed CHAR_UNSIGNED in config.h-dist.
+
+Second arg. of strtod() is char ** rather than const char **.
+
+stackb is now initially malloc()'ed since it may be realloc()'ed.
+
+VMS updates.
+
+Added SIZE_T_MISSING to config stuff and a default typedef to awk.h.
+ (Maybe it is not needed on any current systems??)
+
+re_compile_pattern()'s size is now size_t unconditionally.
+
+Changes from 2.12.20 to 2.12.21
+-------------------------------
+
+Corrected missing/gcvt.c.
+
+Got rid of use of dup2() and thus DUP_MISSING.
+
+Updated config/sgi33.
+
+Turned on (and fixed) in cmp_nodes() the behaviour that I *hope* will be in
+ POSIX 1003.2 for relational comparisons.
+
+Small updates to test suite.
+
+Changes from 2.12.19 to 2.12.20
+-------------------------------
+
+Sloppy, sloppy, sloppy!! I didn't even try to compile the last two
+ patches. This one fixes goofs in regex.c.
+
+Changes from 2.12.18 to 2.12.19
+-------------------------------
+
+Cleanup of last patch.
+
+Changes from 2.12.17 to 2.12.18
+-------------------------------
+
+Makefile renamed to Makefile-dist.
+
+Added alloca() configuration to mkconf. (A bit kludgey.) Just
+ add a single line containing ALLOCA_PW, ALLOCA_S or ALLOCA_C
+ to the appropriate config file to have Makefile-dist edited
+ accordingly.
+
+Reorganized output flushing to correspond with new semantics of
+ devopen() on "/dev/std*" etc.
+
+Fixed rest of last goof!!
+
+Save and restore errno in do_pathopen().
+
+Miscellaneous atari updates.
+
+Get rid of the trailing comma in the NODETYPE definition (Cray
+ compiler won't take it).
+
+Try to make the use of `const' consistent since Cray compiler is
+ fussy about that. See the changes to `basename' and `myname'.
+
+It turns out that, according to section 3.8.3 (Macro Replacement)
+ of the ANSI Standard: ``If there are sequences of preprocessing
+ tokens within the list of arguments that would otherwise act as
+ preprocessing directives, the behavior is undefined.'' That means
+ that you cannot count on the behavior of the declaration of
+ re_compile_pattern in awk.h, and indeed the Cray compiler chokes on it.
+
+Replaced alloca with malloc/realloc/free in regex.c. It was much simpler
+ than expected. (Inside NO_ALLOCA for now -- by default no alloca.)
+
+Added a configuration file, config/cray60, for Unicos-6.0.
+
+Changes from 2.12.16 to 2.12.17
+-------------------------------
+
+Ooops. Goofed signal use in last patch.
+
+Changes from 2.12.15 to 2.12.16
+-------------------------------
+
+RENAMED *_dir to just * (e.g. missing_dir).
+
+Numerous VMS changes.
+
+Proper inclusion of atari and vms files.
+
+Added experimental (ifdef'd out) RELAXED_CONTINUATION and DEFAULT_FILETYPE
+ -- please comment on these!
+
+Moved pathopen() to io.c (sigh).
+
+Put local directory ahead in default AWKPATH.
+
+Added facility in mkconf to echo comments on stdout: lines beginning
+ with "#echo " will have the remainder of the line echoed when mkconf is run.
+ Any lines starting with "#" will otherwise be treated as comments. The
+ intent is to be able to say:
+ "#echo Make sure you uncomment alloca.c in the Makefile"
+ or the like.
+
+Prototype fix for V.4
+
+Fixed version_string to not print leading @(#).
+
+Fixed FIELDWIDTHS to work with strict (turned out to be easy).
+
+Fixed conf for V.2.
+
+Changed semantics of /dev/fd/n to be like on real /dev/fd.
+
+Several configuration and updates in the makefile.
+
+Updated manpage.
+
+Include tzset.c and system.c from missing_dir that were accidently left out of
+ the last patch.
+
+Fixed bug in cmdline variable assignment -- arg was getting freed(!) in
+ call to variable.
+
+Backed out of parse-time constant folding for now, until I can figure out
+ how to do it right.
+
+Fixed devopen() so that getline <"-" works.
+
+Changes from 2.12.14 to 2.12.15
+-------------------------------
+
+Changed config/* to a condensed form that can be used with mkconf to generate
+ a config.h from config.h-dist -- much easier to maintain. Please check
+ carefully against what you had before for a particular system and report
+ any problems. vms.h remains separate since the stuff at the bottom
+ didn't quite fit the mkconf model -- hopefully cleared up later.
+
+Fixed bug in grammar -- didn't allow function definition to be separated from
+ other rules by a semi-colon.
+
+VMS fix to #includes in missing.c -- should we just be including awk.h?
+
+Updated README for texinfo.tex version.
+
+Updating of copyright in all .[chy] files.
+
+Added but commented out Michal's fix to strftime.
+
+Added tzset() emulation based on Rick Adams' code. Added TZSET_MISSING to
+ config.h-dist.
+
+Added strftime.3 man page for missing_dir
+
+More posix: func, **, **= don't work in -W posix
+
+More lint: ^, ^= not in old awk
+
+gawk.1: removed ref to -DNO_DEV_FD, other minor updating.
+
+Style change: pushbak becomes pushback() in yylex().
+
+Changes from 2.12.13 to 2.12.14
+-------------------------------
+
+Better (?) organization of awk.h -- attempt to keep all system dependencies
+ near the top and move some of the non-general things out of the config.h
+ files.
+
+Change to handling of SYSTEM_MISSING.
+
+Small change to ultrix config.
+
+Do "/dev/fd/*" etc. checking at runtime.
+
+First pass at VMS port.
+
+Improvements to error handling (when lexeme spans buffers).
+
+Fixed backslash handling -- why didn't I notice this sooner?
+
+Added programs from book to test suite and new target "bigtest" to Makefile.
+
+Changes from 2.12.12 to 2.12.13
+-------------------------------
+
+Recognize OFS and ORS specially so that OFS = 9 works without efficiency hit.
+ Took advantage of opportunity to tune do_print*() for about 10% win on a
+ print with 5 args (i.e. small but significant).
+
+Somewhat pervasive changes to reconcile CONVFMT vs. OFMT.
+
+Better initialization of builtin vars.
+
+Make config/* consistent wrt STRTOL_MISSING.
+
+Small portability improvement to alloca.s
+
+Improvements to lint code in awk.y
+
+Replaced strtol() with a better one by Chris Torek.
+
+Changes from 2.12.11 to 2.12.12
+-------------------------------
+
+Added PORTS file to record successful ports.
+
+Added #define const to nothing if not STDC and added const to strtod() header.
+
+Added * to printf capabilities and partially implemented ' ' and '+' (has an
+ effect for %d only, silently ignored for other formats). I'm afraid that's
+ as far as I want to go before I look at a complete replacement for
+ do_sprintf().
+
+Added warning for /regexp/ on LHS of MATCHOP.
+
+Changes from 2.12.10 to 2.12.11
+-------------------------------
+
+Small Makefile improvements.
+
+Some remaining nits from the NeXT port.
+
+Got rid of bcopy() define in awk.h -- not needed anymore (??)
+
+Changed private in builtin.c -- it is special on Sequent.
+
+Added subset implementation of strtol() and STRTOL_MISSING.
+
+A little bit of cleanup in debug.c, dfa.c.
+
+Changes from 2.12.9 to 2.12.10
+------------------------------
+
+Redid compatability checking and checking for # of args.
+
+Removed all references to variables[] from outside awk.y, in preparation
+ for a more abstract interface to the symbol table.
+
+Got rid of a remaining use of bcopy() in regex.c.
+
+Changes from 2.12.8 to 2.12.9
+-----------------------------
+
+Portability improvements for atari, next and decstation.
+
+Bug fix in substr() -- wasn't handling 3rd arg. of -1 properly.
+
+Manpage updates.
+
+Moved support from src release to doc release.
+
+Updated FUTURES file.
+
+Added some "lint" warnings.
+
+Changes from 2.12.7 to 2.12.8
+-----------------------------
+
+Changed time() to systime().
+
+Changed warning() in snode() to fatal().
+
+strftime() now defaults second arg. to current time.
+
+Changes from 2.12.6 to 2.12.7
+-----------------------------
+
+Fixed bug in sub_common() involving inadequate allocation of a buffer.
+
+Added some missing files to the Makefile.
+
+Changes from 2.12.5 to 2.12.6
+-----------------------------
+
+Fixed bug wherein non-redirected getline could call iop_close() just
+ prior to a call from do_input().
+
+Fixed bug in handling of /dev/stdout and /dev/stderr.
+
+Changes from 2.12.4 to 2.12.5
+-----------------------------
+
+Updated README and support directory.
+
+Changes from 2.12.3 to 2.12.4
+-----------------------------
+
+Updated CHANGES and TODO (should have been done in previous 2 patches).
+
+Changes from 2.12.2 to 2.12.3
+-----------------------------
+
+Brought regex.c and alloca.s into line with current FSF versions.
+
+Changes from 2.12.1 to 2.12.2
+-----------------------------
+
+Portability improvements; mostly moving system prototypes out of awk.h
+
+Introduction of strftime.
+
+Use of CONVFMT.
+
+Changes from 2.12 to 2.12.1
+-----------------------------
+
+Consolidated treatment of command-line assignments (thus correcting the
+-v treatment).
+
+Rationalized builtin-variable handling into a table-driven process, thus
+simplifying variable() and eliminating spc_var().
+
+Fixed bug in handling of command-line source that ended in a newline.
+
+Simplified install() and lookup().
+
+Did away with double-mallocing of identifiers and now free second and later
+instances of a name, after the first gets installed into the symbol table.
+
+Treat IGNORECASE specially, simplifying a lot of code, and allowing
+checking against strict conformance only on setting it, rather than on each
+pattern match.
+
+Fixed regexp matching when IGNORECASE is non-zero (broken when dfa.c was
+added).
+
+Fixed bug where $0 was not being marked as valid, even after it was rebuilt.
+This caused mangling of $0.
+
+
+Changes from 2.11.1 to 2.12
+-----------------------------
+
+Makefile:
+
+Portability improvements in Makefile.
+Move configuration stuff into config.h
+
+FSF files:
+
+Synchronized alloca.[cs] and regex.[ch] with FSF.
+
+array.c:
+
+Rationalized hash routines into one with a different algorithm.
+delete() now works if the array is a local variable.
+Changed interface of assoc_next() and avoided dereferencing past the end of the
+ array.
+
+awk.h:
+
+Merged non-prototype and prototype declarations in awk.h.
+Expanded tree_eval #define to short-circuit more calls of r_tree_eval().
+
+awk.y:
+
+Delinted some of the code in the grammar.
+Fixed and improved some of the error message printing.
+Changed to accomodate unlimited length source lines.
+Line continuation now works as advertised.
+Source lines can be arbitrarily long.
+Refined grammar hacks so that /= assignment works. Regular expressions
+ starting with /= are recognized at the beginning of a line, after && or ||
+ and after ~ or !~. More contexts can be added if necessary.
+Fixed IGNORECASE (multiple scans for backslash).
+Condensed expression_lists in array references.
+Detect and warn for correct # args in builtin functions -- call most of them
+ with a fixed number (i.e. fill in defaults at parse-time rather than at
+ run-time).
+Load ENVIRON only if it is referenced (detected at parse-time).
+Treat NF, FS, RS, NR, FNR specially at parse time, to improve run time.
+Fold constant expressions at parse time.
+Do make_regexp() on third arg. of split() at parse tiem if it is a constant.
+
+builtin.c:
+
+srand() returns 0 the first time called.
+Replaced alloca() with malloc() in do_sprintf().
+Fixed setting of RSTART and RLENGTH in do_match().
+Got rid of get_{one,two,three} and allowance for variable # of args. at
+ run-time -- this is now done at parse-time.
+Fixed latent bug in [g]sub whereby changes to $0 would never get made.
+Rewrote much of sub_common() for simplicity and performance.
+Added ctime() and time() builtin functions (unless -DSTRICT). ctime() returns
+ a time string like the C function, given the number of seconds since the epoch
+ and time() returns the current time in seconds.
+do_sprintf() now checks for mismatch between format string and number of
+ arguments supplied.
+
+dfa.c
+
+This is borrowed (almost unmodified) from GNU grep to provide faster searches.
+
+eval.c
+
+Node_var, Node_var_array and Node_param_list handled from macro rather
+ than in r_tree_eval().
+Changed cmp_nodes() to not do a force_number() -- this, combined with a
+ force_number() on ARGV[] and ENVIRON[] brings it into line with other awks
+Greatly simplified cmp_nodes().
+Separated out Node_NF, Node_FS, Node_RS, Node_NR and Node_FNR in get_lhs().
+All adjacent string concatenations now done at once.
+
+field.c
+
+Added support for FIELDWIDTHS.
+Fixed bug in get_field() whereby changes to a field were not always
+ properly reflected in $0.
+Reordered tests in parse_field() so that reference off the end of the buffer
+ doesn't happen.
+set_FS() now sets *parse_field i.e. routine to call depending on type of FS.
+It also does make_regexp() for FS if needed. get_field() passes FS_regexp
+ to re_parse_field(), as does do_split().
+Changes to set_field() and set_record() to avoid malloc'ing and free'ing the
+ field nodes repeatedly. The fields now just point into $0 unless they are
+ assigned to another variable or changed. force_number() on the field is
+ *only* done when the field is needed.
+
+gawk.1
+
+Fixed troff formatting problem on .TP lines.
+
+io.c
+
+Moved some code out into iop.c.
+Output from pipes and system() calls is properly synchronized.
+Status from pipe close properly returned.
+Bug in getline with no redirect fixed.
+
+iop.c
+
+This file contains a totally revamped get_a_record and associated code.
+
+main.c
+
+Command line programs no longer use a temporary file.
+Therefore, tmpnam() no longer required.
+Deprecated -a and -e options -- they will go away in the next release,
+ but for now they cause a warning.
+Moved -C, -V, -c options to -W ala posix.
+Added -W posix option: throw out \x
+Added -W lint option.
+
+
+node.c
+
+force_number() now allows pure numerics to have leading whitespace.
+Added make_string facility to optimize case of adding an already malloc'd
+ string.
+Cleaned up and simplified do_deref().
+Fixed bug in handling of stref==255 in do_deref().
+
+re.c
+
+contains the interface to regexp code
+
+Changes from 2.11.1 to FSF version of same
+------------------------------------------
+Thu Jan 4 14:19:30 1990 Jim Kingdon (kingdon at albert)
+
+ * Makefile (YACC): Add -y to bison part.
+
+ * missing.c: Add #include <stdio.h>.
+
+Sun Dec 24 16:16:05 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * Makefile: Add (commented out) default defines for Sony News.
+
+ * awk.h: Move declaration of vprintf so it will compile when
+ -DVPRINTF_MISSING is defined.
+
+Mon Nov 13 18:54:08 1989 Robert J. Chassell (bob at apple-gunkies.ai.mit.edu)
+
+ * gawk.texinfo: changed @-commands that are not part of the
+ standard, currently released texinfmt.el to those that are.
+ Otherwise, only people with the as-yet unreleased makeinfo.c can
+ format this file.
+
+Changes from 2.11beta to 2.11.1 (production)
+--------------------------------------------
+
+Went from "beta" to production status!!!
+
+Now flushes stdout before closing pipes or redirected files to
+synchronize output.
+
+MS-DOS changes added in.
+
+Signal handler return type parameterized in Makefile and awk.h and
+some lint removed. debug.c cleaned up.
+
+Fixed FS splitting to never match null strings, per book.
+
+Correction to the manual's description of FS.
+
+Some compilers break on char *foo = "string" + 4 so fixed version.sh and
+main.c.
+
+Changes from 2.10beta to 2.11beta
+---------------------------------
+
+This release fixes all reported bugs that we could reproduce. Probably
+some of the changes are not documented here.
+
+The next release will probably not be a beta release!
+
+The most important change is the addition of the -nostalgia option. :-)
+
+The documentation has been improved and brought up-to-date.
+
+There has been a lot of general cleaning up of the code that is not otherwise
+documented here. There has been a movement toward using standard-conforming
+library routines and providing them (in missing.d) for systems lacking them.
+Improved (hopefully) configuration through Makfile modifications and missing.c.
+In particular, straightened out confusion over vprintf #defines, declarations
+etc.
+
+Deleted RCS log comments from source, to reduce source size by about one third.
+Most of them were horribly out-of-date, anyway.
+
+Renamed source files to reflect (for the most part) their contents.
+
+More and improved error messages. Cleanup and fixes to yyerror().
+String constants are not altered in input buffer, so error messages come out
+better. Fixed usage message. Make use of ANSI C strerror() function
+(provided).
+
+Plugged many more memory leaks. The memory consumption is now quite
+reasonable over a wide range of programs.
+
+Uses volatile declaration if STDC > 0 to avoid problems due to longjmp.
+
+New -a and -e options to use awk or egrep style regexps, respectively,
+since POSIX says awk should use egrep regexps. Default is -a.
+
+Added -v option for setting variables before the first file is encountered.
+Version information now uses -V and copyleft uses -C.
+
+Added a patchlevel.h file and its use for -V and -C.
+
+Append_right() optimized for major improvement to programs with a *lot*
+of statements.
+
+Operator precedence has been corrected to match draft Posix.
+
+Tightened up grammar for builtin functions so that only length
+may be called without arguments or parentheses.
+
+/regex/ is now a normal expression that can appear in any expression
+context.
+
+Allow /= to begin a regexp. Allow ..[../..].. in a regexp.
+
+Allow empty compound statements ({}).
+
+Made return and next illegal outside a function and in BEGIN/END respectively.
+
+Division by zero is now illegal and causes a fatal error.
+
+Fixed exponentiation so that x ^ 0 and x ^= 0 both return 1.
+
+Fixed do_sqrt, do_log, and do_exp to do argument/return checking and
+print an error message, per the manual.
+
+Fixed main to catch SIGSEGV to get source and data file line numbers.
+
+Fixed yyerror to print the ^ at the beginning of the bad token, not the end.
+
+Fix to substr() builtin: it was failing if the arguments
+weren't already strings.
+
+Added new node value flag NUMERIC to indicate that a variable is
+purely a number as opposed to type NUM which indicates that
+the node's numeric value is valid. This is set in make_number(),
+tmp_number and r_force_number() when appropriate and used in
+cmp_nodes(). This fixed a bug in comparison of variables that had
+numeric prefixes. The new code uses strtod() and eliminates is_a_number().
+A simple strtod() is provided for systems lacking one. It does no
+overflow checking, so could be improved.
+
+Simplification and efficiency improvement in force_string.
+
+Added performance tweak in r_force_number().
+
+Fixed a bug with nested loops and break/continue in functions.
+
+Fixed inconsistency in handling of empty fields when $0 has to be rebuilt.
+Happens to simplify rebuild_record().
+
+Cleaned up the code associated with opening a pipe for reading. Gawk
+now has its own popen routine (gawk_popen) that allocates an IOBUF
+and keeps track of the pid of the child process. gawk_pclose
+marks the appropriate child as defunct in the right struct redirect.
+
+Cleaned up and fixed close_redir().
+
+Fixed an obscure bug to do with redirection. Intermingled ">" and ">>"
+redirects did not output in a predictable order.
+
+Improved handling of output buffering: now all print[f]s redirected to a tty
+or pipe are flushed immediately and non-redirected output to a tty is flushed
+before the next input record is read.
+
+Fixed a bug in get_a_record() where bcopy() could have copied over
+a random pointer.
+
+Fixed a bug when RS="" and records separated by multiple blank lines.
+
+Got rid of SLOWIO code which was out-of-date anyway.
+
+Fix in get_field() for case where $0 is changed and then $(n) are
+changed and then $0 is used.
+
+Fixed infinite loop on failure to open file for reading from getline.
+Now handles redirect file open failures properly.
+
+Filenames such as /dev/stdin now allowed on the command line as well as
+in redirects.
+
+Fixed so that gawk '$1' where $1 is a zero tests false.
+
+Fixed parsing so that `RLENGTH -1' parses the same as `RLENGTH - 1',
+for example.
+
+The return from a user-defined function now defaults to the Null node.
+This fixes a core-dump-causing bug when the return value of a function
+is used and that function returns no value.
+
+Now catches floating point exceptions to avoid core dumps.
+
+Bug fix for deleting elements of an array -- under some conditions, it was
+deleting more than one element at a time.
+
+Fix in AWKPATH code for running off the end of the string.
+
+Fixed handling of precision in *printf calls. %0.2d now works properly,
+as does %c. [s]printf now recognizes %i and %X.
+
+Fixed a bug in printing of very large (>240) strings.
+
+Cleaned up erroneous behaviour for RS == "".
+
+Added IGNORECASE support to index().
+
+Simplified and fixed newnode/freenode.
+
+Fixed reference to $(anything) in a BEGIN block.
+
+Eliminated use of USG rand48().
+
+Bug fix in force_string for machines with 16-bit ints.
+
+Replaced use of mktemp() with tmpnam() and provided a partial implementation of
+the latter for systems that don't have it.
+
+Added a portability check for includes in io.c.
+
+Minor portability fix in alloc.c plus addition of xmalloc().
+
+Portability fix: on UMAX4.2, st_blksize is zero for a pipe, thus breaking
+iop_alloc() -- fixed.
+
+Workaround for compiler bug on Sun386i in do_sprintf.
+
+More and improved prototypes in awk.h.
+
+Consolidated C escape parsing code into one place.
+
+strict flag is now turned on only when invoked with compatability option.
+It now applies to fewer things.
+
+Changed cast of f._ptr in vprintf.c from (unsigned char *) to (char *).
+Hopefully this is right for the systems that use this code (I don't).
+
+Support for pipes under MSDOS added.
diff --git a/contrib/awk/PORTS b/contrib/awk/PORTS
new file mode 100644
index 0000000..c6cbb83
--- /dev/null
+++ b/contrib/awk/PORTS
@@ -0,0 +1,36 @@
+A recent version of gawk has been successfully compiled and run "make test"
+on the following:
+
+Using cc:
+ Dec Alpha OSF 4.0
+ HP9000/755 HP-UX 9.01
+ IBM PowerPC AIX 4.1.4.0
+ SCO Unix (OpenServer 5)
+ SGI IRIX 4.0.5
+ SGI IRIX 5.3
+ SGI IRIX 6.1
+ SGI IRIX 6.2
+ SunOS 4.1.3
+ SunOS 5.5
+ IBM SP2 AIX 4.1
+
+Other systems:
+ DEC Alpha Linux/AXP
+ DEC Alpha OSF/1 3.2
+ DECstation 5000 ULTRIX 4.3
+ HP 9000/735 HP-UX 10.01
+ IBM RS/6000 AIX 3.2
+ IBM SP2 AIX 4.1
+ Intel x86 DOS (compiler: djgpp v2, emx+gcc,
+ and MSC 6.00A, 7, and 8)
+ Intel x86 Linux 2.0.27
+ Intel x86 Linux 2.1.36
+ Intel x86 OS+2 (compiler: emx+gcc)
+ NeXT Turbostation Mach 3.3
+ SGI Indigo/2 IRIX 5.3
+ SGI O2 IRIX 6.2
+ SGI PowerChallenge IRIX 6.1
+ Sun SPARC Linux 2.0.22
+ Sun SPARC Solaris 2.5
+ Sun SPARC Solaris 2.5.1
+ Sun SPARC SunOS 4.1.3
diff --git a/contrib/awk/POSIX.STD b/contrib/awk/POSIX.STD
new file mode 100644
index 0000000..ac8e1ab
--- /dev/null
+++ b/contrib/awk/POSIX.STD
@@ -0,0 +1,109 @@
+August 1995:
+
+Although the published 1003.2 standard contained the incorrect
+comparison rules of 11.2 draft as described below, no actual implementation
+of awk (that I know of) actually used those rules.
+
+A revision of the 1003.2 standard is in progress, and in the May 1995
+draft, the rules were fixed (based on my submissions for interpretation
+requests) to match the description given below. Thus, the next version
+of the standard will have a correct description of the comparison
+rules.
+
+June 1992:
+
+Right now, the numeric vs. string comparisons are screwed up in draft
+11.2. What prompted me to check it out was the note in gnu.bug.utils
+which observed that gawk was doing the comparison $1 == "000"
+numerically. I think that we can agree that intuitively, this should
+be done as a string comparison. Version 2.13.2 of gawk follows the
+current POSIX draft. Following is how I (now) think this
+stuff should be done.
+
+1. A numeric literal or the result of a numeric operation has the NUMERIC
+ attribute.
+
+2. A string literal or the result of a string operation has the STRING
+ attribute.
+
+3. Fields, getline input, FILENAME, ARGV elements, ENVIRON elements and the
+ elements of an array created by split() that are numeric strings
+ have the STRNUM attribute. Otherwise, they have the STRING attribute.
+ Uninitialized variables also have the STRNUM attribute.
+
+4. Attributes propagate across assignments, but are not changed by
+ any use. (Although a use may cause the entity to acquire an additional
+ value such that it has both a numeric and string value -- this leaves the
+ attribute unchanged.)
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following (symmetric) matrix:
+
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
+
+So, the following program should print all OKs.
+
+echo '0e2 0a 0 0b
+0e2 0a 0 0b' |
+$AWK '
+NR == 1 {
+ num = 0
+ str = "0e2"
+
+ print ++test ": " ( (str == "0e2") ? "OK" : "OOPS" )
+ print ++test ": " ( ("0e2" != 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ("0" != $2) ? "OK" : "OOPS" )
+ print ++test ": " ( ("0e2" == $1) ? "OK" : "OOPS" )
+
+ print ++test ": " ( (0 == "0") ? "OK" : "OOPS" )
+ print ++test ": " ( (0 == num) ? "OK" : "OOPS" )
+ print ++test ": " ( (0 != $2) ? "OK" : "OOPS" )
+ print ++test ": " ( (0 == $1) ? "OK" : "OOPS" )
+
+ print ++test ": " ( ($1 != "0") ? "OK" : "OOPS" )
+ print ++test ": " ( ($1 == num) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != $1) ? "OK" : "OOPS" )
+ print ++test ": " ( ($3 == 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ($3 == $1) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != $4) ? "OK" : "OOPS" ) # 15
+}
+{
+ a = "+2"
+ b = 2
+ if (NR % 2)
+ c = a + b
+ print ++test ": " ( (a != b) ? "OK" : "OOPS" ) # 16 and 22
+
+ d = "2a"
+ b = 2
+ if (NR % 2)
+ c = d + b
+ print ++test ": " ( (d != b) ? "OK" : "OOPS" )
+
+ print ++test ": " ( (d + 0 == b) ? "OK" : "OOPS" )
+
+ e = "2"
+ print ++test ": " ( (e == b "") ? "OK" : "OOPS" )
+
+ a = "2.13"
+ print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" )
+
+ a = "2.130000"
+ print ++test ": " ( (a != 2.13) ? "OK" : "OOPS" )
+
+ if (NR == 2) {
+ CONVFMT = "%.6f"
+ print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" )
+ }
+}'
diff --git a/contrib/awk/PROBLEMS b/contrib/awk/PROBLEMS
new file mode 100644
index 0000000..8f7d954
--- /dev/null
+++ b/contrib/awk/PROBLEMS
@@ -0,0 +1,10 @@
+This is a list of known problems in gawk 3.0.
+Hopefully they will all be fixed in the next major release of gawk.
+
+Please keep in mind that the code is still undergoing significant evolution.
+
+1. The interactions with the lexer and yyerror need reworking. It is possible
+ to get line numbers that are one line off if --compat or --posix is
+ true and either `nextfile' or `delete array' are used.
+
+ Really the whole lexical analysis stuff needs reworking.
diff --git a/contrib/awk/README b/contrib/awk/README
new file mode 100644
index 0000000..890b16d
--- /dev/null
+++ b/contrib/awk/README
@@ -0,0 +1,94 @@
+README:
+
+This is GNU Awk 3.0.3. It should be upwardly compatible with the Bell
+Labs research version of awk. It is almost completely compliant with
+the 1993 POSIX 1003.2 standard for awk. (See the note below about POSIX.)
+
+Patches 1 through 3 just fix bugs -- see NEWS and ChangeLog for details.
+
+See the file INSTALL for installation instructions.
+
+Known problems are given in the PROBLEMS file. Work to be done is
+described briefly in the FUTURES file. Verified ports are listed in
+the PORTS file. Changes in this version are summarized in the NEWS file.
+Please read the LIMITATIONS and ACKNOWLEDGMENT files.
+
+Read the file POSIX for a discussion of how the standard says comparisons
+should be done vs. how they really should be done and how gawk does them.
+
+To format the documentation with TeX, use at least version 2.151 of
+texinfo.tex. There is a usable copy of texinfo.tex in the doc directory.
+
+The Info file now comes with the distribution.
+
+The man page is up to date.
+
+INSTALLATION:
+
+Check whether there is a system-specific README file for your system
+under the `README_d' directory. If there's something there that you
+should have read and didn't, and you bug me about it, I'm going to yell
+at you.
+
+See the file INSTALL.
+
+If you have neither bison nor yacc, use the awktab.c file here. It was
+generated with bison, and has no proprietary code in it. (Note that modifying
+awk.y without bison or yacc will be difficult, at best. You might want
+to get a copy of bison from the FSF too.)
+
+If you have an MS-DOS or OS/2 system, use the stuff in the `pc' directory.
+Similarly, there are separate directories for Ataris, Amigas, and VMS.
+
+Appendix B of The GNU Awk User's Guide discusses configuration in detail. The
+configuration process is now based on Autoconf, so the whole business
+should be considerably simpler than it was in gawk 2.X.Y.
+
+After successful compilation, do 'make check' to run a small test
+suite. There should be no output from the 'cmp' invocations except in
+the cases where there are small differences in floating point values,
+and possibly in the case of strftime. Several of the tests ignore
+errors on purpose; those are not a problem. If there are other
+differences, please investigate and report the problem.
+
+PRINTING THE MANUAL
+
+The 'doc' directory contains a recent version of texinfo.tex, which will be
+necessary for printing the manual. Use `make dvi' to get a DVI file from
+the manual. In the `doc' directory, use `make postscript' to get a PostScript
+version of the manual.
+
+BUG REPORTS AND FIXES (Un*x systems):
+
+Please coordinate changes through Arnold Robbins. In particular, see
+the section in the manual on reporting bugs. Note that comp.lang.awk is
+about the worst place to post a gawk bug report. Please, use the mechanisms
+outlined in the manual.
+
+Arnold Robbins
+INTERNET: arnold@gnu.ai.mit.edu
+
+BUG REPORTS AND FIXES (non-Unix ports):
+
+MS-DOS:
+ Scott Deifik
+ scottd@amgen.com
+
+ Darrel Hankerson
+ hankedr@mail.auburn.edu
+
+VMS:
+ Pat Rankin
+ rankin@eql.caltech.edu
+
+Atari ST:
+ Michal Jaegermann
+ michal@gortel.phys.ualberta.ca
+
+OS/2:
+ Kai Uwe Rommel
+ rommel@ars.de
+
+Amiga:
+ Fred Fish
+ fnf@ninemoons.com
diff --git a/contrib/awk/README_d/README.FIRST b/contrib/awk/README_d/README.FIRST
new file mode 100644
index 0000000..2ebd5b7
--- /dev/null
+++ b/contrib/awk/README_d/README.FIRST
@@ -0,0 +1,21 @@
+Sat Feb 18 23:07:55 EST 1995
+
+Starting with 2.15.6, gawk will preserve the value of NF and $0 for
+the last record read into the END rule(s). This is important to you
+if your program uses
+
+ print
+
+in an END rule to mean
+
+ print ""
+
+(i.e., print nothing). Examine your awk programs carefully to make sure
+that they use `print ""' instead of `print', otherwise you will get
+strange results.
+
+If you send me email about this, without having read this
+file, I will yell at you.
+
+Arnold Robbins
+arnold@gnu.ai.mit.edu
diff --git a/contrib/awk/acconfig.h b/contrib/awk/acconfig.h
new file mode 100644
index 0000000..05f3c61
--- /dev/null
+++ b/contrib/awk/acconfig.h
@@ -0,0 +1,36 @@
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
+/*
+ * Copyright (C) 1995-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+@TOP@
+
+#undef HAVE_STRINGIZE /* can use ANSI # operator in cpp */
+#undef REGEX_MALLOC /* use malloc instead of alloca in regex.c */
+#undef SPRINTF_RET /* return type of sprintf */
+#undef BITOPS /* bitwise ops (undocumented feature) */
+#undef NONDECDATA /* non-decimal input data (undocumented feature) */
+
+@BOTTOM@
+
+#include <custom.h> /* overrides for stuff autoconf can't deal with */
diff --git a/contrib/awk/array.c b/contrib/awk/array.c
new file mode 100644
index 0000000..b178cd2
--- /dev/null
+++ b/contrib/awk/array.c
@@ -0,0 +1,526 @@
+/*
+ * array.c - routines for associative arrays.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 - 97 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/*
+ * Tree walks (``for (iggy in foo)'') and array deletions use expensive
+ * linear searching. So what we do is start out with small arrays and
+ * grow them as needed, so that our arrays are hopefully small enough,
+ * most of the time, that they're pretty full and we're not looking at
+ * wasted space.
+ *
+ * The decision is made to grow the array if the average chain length is
+ * ``too big''. This is defined as the total number of entries in the table
+ * divided by the size of the array being greater than some constant.
+ */
+
+#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */
+
+#include "awk.h"
+
+static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
+static void grow_table P((NODE *symbol));
+
+/* concat_exp --- concatenate expression list into a single string */
+
+NODE *
+concat_exp(tree)
+register NODE *tree;
+{
+ register NODE *r;
+ char *str;
+ char *s;
+ size_t len;
+ int offset;
+ size_t subseplen;
+ char *subsep;
+
+ if (tree->type != Node_expression_list)
+ return force_string(tree_eval(tree));
+ r = force_string(tree_eval(tree->lnode));
+ if (tree->rnode == NULL)
+ return r;
+ subseplen = SUBSEP_node->lnode->stlen;
+ subsep = SUBSEP_node->lnode->stptr;
+ len = r->stlen + subseplen + 2;
+ emalloc(str, char *, len, "concat_exp");
+ memcpy(str, r->stptr, r->stlen+1);
+ s = str + r->stlen;
+ free_temp(r);
+ for (tree = tree->rnode; tree != NULL; tree = tree->rnode) {
+ if (subseplen == 1)
+ *s++ = *subsep;
+ else {
+ memcpy(s, subsep, subseplen+1);
+ s += subseplen;
+ }
+ r = force_string(tree_eval(tree->lnode));
+ len += r->stlen + subseplen;
+ offset = s - str;
+ erealloc(str, char *, len, "concat_exp");
+ s = str + offset;
+ memcpy(s, r->stptr, r->stlen+1);
+ s += r->stlen;
+ free_temp(r);
+ }
+ r = make_str_node(str, s - str, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ return r;
+}
+
+/* assoc_clear --- flush all the values in symbol[] before doing a split() */
+
+void
+assoc_clear(symbol)
+NODE *symbol;
+{
+ int i;
+ NODE *bucket, *next;
+
+ if (symbol->var_array == NULL)
+ return;
+ for (i = 0; i < symbol->array_size; i++) {
+ for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
+ next = bucket->ahnext;
+ unref(bucket->ahname);
+ unref(bucket->ahvalue);
+ freenode(bucket);
+ }
+ symbol->var_array[i] = NULL;
+ }
+ free(symbol->var_array);
+ symbol->var_array = NULL;
+ symbol->array_size = symbol->table_size = 0;
+ symbol->flags &= ~ARRAYMAXED;
+}
+
+/* hash --- calculate the hash function of the string in subs */
+
+unsigned int
+hash(s, len, hsize)
+register const char *s;
+register size_t len;
+unsigned long hsize;
+{
+ register unsigned long h = 0;
+
+ /*
+ * This is INCREDIBLY ugly, but fast. We break the string up into
+ * 8 byte units. On the first time through the loop we get the
+ * "leftover bytes" (strlen % 8). On every other iteration, we
+ * perform 8 HASHC's so we handle all 8 bytes. Essentially, this
+ * saves us 7 cmp & branch instructions. If this routine is
+ * heavily used enough, it's worth the ugly coding.
+ *
+ * OZ's original sdbm hash, copied from Margo Seltzers db package.
+ */
+
+ /*
+ * Even more speed:
+ * #define HASHC h = *s++ + 65599 * h
+ * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
+ */
+#define HASHC htmp = (h << 6); \
+ h = *s++ + htmp + (htmp << 10) - h
+
+ unsigned long htmp;
+
+ h = 0;
+
+#if defined(VAXC)
+ /*
+ * This was an implementation of "Duff's Device", but it has been
+ * redone, separating the switch for extra iterations from the
+ * loop. This is necessary because the DEC VAX-C compiler is
+ * STOOPID.
+ */
+ switch (len & (8 - 1)) {
+ case 7: HASHC;
+ case 6: HASHC;
+ case 5: HASHC;
+ case 4: HASHC;
+ case 3: HASHC;
+ case 2: HASHC;
+ case 1: HASHC;
+ default: break;
+ }
+
+ if (len > (8 - 1)) {
+ register size_t loop = len >> 3;
+ do {
+ HASHC;
+ HASHC;
+ HASHC;
+ HASHC;
+ HASHC;
+ HASHC;
+ HASHC;
+ HASHC;
+ } while (--loop);
+ }
+#else /* ! VAXC */
+ /* "Duff's Device" for those who can handle it */
+ if (len > 0) {
+ register size_t loop = (len + 8 - 1) >> 3;
+
+ switch (len & (8 - 1)) {
+ case 0:
+ do { /* All fall throughs */
+ HASHC;
+ case 7: HASHC;
+ case 6: HASHC;
+ case 5: HASHC;
+ case 4: HASHC;
+ case 3: HASHC;
+ case 2: HASHC;
+ case 1: HASHC;
+ } while (--loop);
+ }
+ }
+#endif /* ! VAXC */
+
+ if (h >= hsize)
+ h %= hsize;
+ return h;
+}
+
+/* assoc_find --- locate symbol[subs] */
+
+static NODE * /* NULL if not found */
+assoc_find(symbol, subs, hash1)
+NODE *symbol;
+register NODE *subs;
+int hash1;
+{
+ register NODE *bucket;
+
+ for (bucket = symbol->var_array[hash1]; bucket != NULL;
+ bucket = bucket->ahnext) {
+ if (cmp_nodes(bucket->ahname, subs) == 0)
+ return bucket;
+ }
+ return NULL;
+}
+
+/* in_array --- test whether the array element symbol[subs] exists or not */
+
+int
+in_array(symbol, subs)
+NODE *symbol, *subs;
+{
+ register int hash1;
+ int ret;
+
+ if (symbol->type == Node_param_list)
+ symbol = stack_ptr[symbol->param_cnt];
+ if ((symbol->flags & SCALAR) != 0)
+ fatal("attempt to use scalar as array");
+ /*
+ * evaluate subscript first, it could have side effects
+ */
+ subs = concat_exp(subs); /* concat_exp returns a string node */
+ if (symbol->var_array == NULL) {
+ free_temp(subs);
+ return 0;
+ }
+ hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
+ ret = (assoc_find(symbol, subs, hash1) != NULL);
+ free_temp(subs);
+ return ret;
+}
+
+/*
+ * assoc_lookup:
+ * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
+ * isn't there. Returns a pointer ala get_lhs to where its value is stored.
+ *
+ * SYMBOL is the address of the node (or other pointer) being dereferenced.
+ * SUBS is a number or string used as the subscript.
+ */
+
+NODE **
+assoc_lookup(symbol, subs)
+NODE *symbol, *subs;
+{
+ register int hash1;
+ register NODE *bucket;
+
+ (void) force_string(subs);
+
+ if ((symbol->flags & SCALAR) != 0)
+ fatal("attempt to use scalar as array");
+
+ if (symbol->var_array == NULL) {
+ symbol->type = Node_var_array;
+ symbol->array_size = symbol->table_size = 0; /* sanity */
+ symbol->flags &= ~ARRAYMAXED;
+ grow_table(symbol);
+ hash1 = hash(subs->stptr, subs->stlen,
+ (unsigned long) symbol->array_size);
+ } else {
+ hash1 = hash(subs->stptr, subs->stlen,
+ (unsigned long) symbol->array_size);
+ bucket = assoc_find(symbol, subs, hash1);
+ if (bucket != NULL) {
+ free_temp(subs);
+ return &(bucket->ahvalue);
+ }
+ }
+
+ /* It's not there, install it. */
+ if (do_lint && subs->stlen == 0)
+ warning("subscript of array `%s' is null string",
+ symbol->vname);
+
+ /* first see if we would need to grow the array, before installing */
+ symbol->table_size++;
+ if ((symbol->flags & ARRAYMAXED) == 0
+ && symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) {
+ grow_table(symbol);
+ /* have to recompute hash value for new size */
+ hash1 = hash(subs->stptr, subs->stlen,
+ (unsigned long) symbol->array_size);
+ }
+
+ getnode(bucket);
+ bucket->type = Node_ahash;
+ if (subs->flags & TEMP)
+ bucket->ahname = dupnode(subs);
+ else {
+ unsigned int saveflags = subs->flags;
+
+ subs->flags &= ~MALLOC;
+ bucket->ahname = dupnode(subs);
+ subs->flags = saveflags;
+ }
+ free_temp(subs);
+
+ /* array subscripts are strings */
+ bucket->ahname->flags &= ~NUMBER;
+ bucket->ahname->flags |= STRING;
+ bucket->ahvalue = Nnull_string;
+ bucket->ahnext = symbol->var_array[hash1];
+ symbol->var_array[hash1] = bucket;
+ return &(bucket->ahvalue);
+}
+
+/* do_delete --- perform `delete array[s]' */
+
+void
+do_delete(symbol, tree)
+NODE *symbol, *tree;
+{
+ register int hash1;
+ register NODE *bucket, *last;
+ NODE *subs;
+
+ if (symbol->type == Node_param_list) {
+ symbol = stack_ptr[symbol->param_cnt];
+ if (symbol->type == Node_var)
+ return;
+ }
+ if (symbol->type == Node_var_array) {
+ if (symbol->var_array == NULL)
+ return;
+ } else
+ fatal("delete: illegal use of variable `%s' as array",
+ symbol->vname);
+
+ if (tree == NULL) { /* delete array */
+ assoc_clear(symbol);
+ return;
+ }
+
+ subs = concat_exp(tree); /* concat_exp returns string node */
+ hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
+
+ last = NULL;
+ for (bucket = symbol->var_array[hash1]; bucket != NULL;
+ last = bucket, bucket = bucket->ahnext)
+ if (cmp_nodes(bucket->ahname, subs) == 0)
+ break;
+ free_temp(subs);
+ if (bucket == NULL) {
+ if (do_lint)
+ warning("delete: index `%s' not in array `%s'",
+ subs->stptr, symbol->vname);
+ return;
+ }
+ if (last != NULL)
+ last->ahnext = bucket->ahnext;
+ else
+ symbol->var_array[hash1] = bucket->ahnext;
+ unref(bucket->ahname);
+ unref(bucket->ahvalue);
+ freenode(bucket);
+ symbol->table_size--;
+ if (symbol->table_size <= 0) {
+ memset(symbol->var_array, '\0',
+ sizeof(NODE *) * symbol->array_size);
+ symbol->table_size = symbol->array_size = 0;
+ symbol->flags &= ~ARRAYMAXED;
+ free((char *) symbol->var_array);
+ symbol->var_array = NULL;
+ }
+}
+
+/* assoc_scan --- start a ``for (iggy in foo)'' loop */
+
+void
+assoc_scan(symbol, lookat)
+NODE *symbol;
+struct search *lookat;
+{
+ lookat->sym = symbol;
+ lookat->idx = 0;
+ lookat->bucket = NULL;
+ lookat->retval = NULL;
+ if (symbol->var_array != NULL)
+ assoc_next(lookat);
+}
+
+/* assoc_next --- actually find the next element in array */
+
+void
+assoc_next(lookat)
+struct search *lookat;
+{
+ register NODE *symbol = lookat->sym;
+
+ if (symbol == NULL)
+ fatal("null symbol in assoc_next");
+ if (symbol->var_array == NULL || lookat->idx > symbol->array_size) {
+ lookat->retval = NULL;
+ return;
+ }
+ /*
+ * This is theoretically unsafe. The element bucket might have
+ * been freed if the body of the scan did a delete on the next
+ * element of the bucket. The only way to do that is by array
+ * reference, which is unlikely. Basically, if the user is doing
+ * anything other than an operation on the current element of an
+ * assoc array while walking through it sequentially, all bets are
+ * off. (The safe way is to register all search structs on an
+ * array with the array, and update all of them on a delete or
+ * insert)
+ */
+ if (lookat->bucket != NULL) {
+ lookat->retval = lookat->bucket->ahname;
+ lookat->bucket = lookat->bucket->ahnext;
+ return;
+ }
+ for (; lookat->idx < symbol->array_size; lookat->idx++) {
+ NODE *bucket;
+
+ if ((bucket = symbol->var_array[lookat->idx]) != NULL) {
+ lookat->retval = bucket->ahname;
+ lookat->bucket = bucket->ahnext;
+ lookat->idx++;
+ return;
+ }
+ }
+ lookat->retval = NULL;
+ lookat->bucket = NULL;
+ return;
+}
+
+/* grow_table --- grow a hash table */
+
+static void
+grow_table(symbol)
+NODE *symbol;
+{
+ NODE **old, **new, *chain, *next;
+ int i, j;
+ unsigned long hash1;
+ unsigned long oldsize, newsize;
+ /*
+ * This is an array of primes. We grow the table by an order of
+ * magnitude each time (not just doubling) so that growing is a
+ * rare operation. We expect, on average, that it won't happen
+ * more than twice. The final size is also chosen to be small
+ * enough so that MS-DOG mallocs can handle it. When things are
+ * very large (> 8K), we just double more or less, instead of
+ * just jumping from 8K to 64K.
+ */
+ static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497,
+#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist)
+ 131101, 262147, 524309, 1048583, 2097169,
+ 4194319, 8388617, 16777259, 33554467,
+ 67108879, 134217757, 268435459, 536870923,
+ 1073741827
+#endif
+ };
+
+ /* find next biggest hash size */
+ newsize = oldsize = symbol->array_size;
+ for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
+ if (oldsize < sizes[i]) {
+ newsize = sizes[i];
+ break;
+ }
+ }
+
+ if (newsize == oldsize) { /* table already at max (!) */
+ symbol->flags |= ARRAYMAXED;
+ return;
+ }
+
+ /* allocate new table */
+ emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
+ memset(new, '\0', newsize * sizeof(NODE *));
+
+ /* brand new hash table, set things up and return */
+ if (symbol->var_array == NULL) {
+ symbol->table_size = 0;
+ goto done;
+ }
+
+ /* old hash table there, move stuff to new, free old */
+ old = symbol->var_array;
+ for (i = 0; i < oldsize; i++) {
+ if (old[i] == NULL)
+ continue;
+
+ for (chain = old[i]; chain != NULL; chain = next) {
+ next = chain->ahnext;
+ hash1 = hash(chain->ahname->stptr,
+ chain->ahname->stlen, newsize);
+
+ /* remove from old list, add to new */
+ chain->ahnext = new[hash1];
+ new[hash1] = chain;
+
+ }
+ }
+ free(old);
+
+done:
+ /*
+ * note that symbol->table_size does not change if an old array,
+ * and is explicitly set to 0 if a new one.
+ */
+ symbol->var_array = new;
+ symbol->array_size = newsize;
+}
diff --git a/contrib/awk/awk.h b/contrib/awk/awk.h
new file mode 100644
index 0000000..630144d
--- /dev/null
+++ b/contrib/awk/awk.h
@@ -0,0 +1,882 @@
+/*
+ * awk.h -- Definitions for gawk.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* ------------------------------ Includes ------------------------------ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1 /* enable GNU extensions */
+#endif /* _GNU_SOURCE */
+
+#include <stdio.h>
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif /* HAVE_LIMITS_H */
+#include <ctype.h>
+#include <setjmp.h>
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif /* HAVE_LOCALE_H */
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#if ! defined(errno) && ! defined(MSDOS) && ! defined(OS2)
+extern int errno;
+#endif
+#ifdef HAVE_SIGNUM_H
+#include <signum.h>
+#endif
+
+/* ----------------- System dependencies (with more includes) -----------*/
+
+/* This section is the messiest one in the file, not a lot that can be done */
+
+/* First, get the ctype stuff right; from Jim Meyering */
+#if defined(STDC_HEADERS) || (!defined(isascii) && !defined(HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (ISASCII(c) && isblank(c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (ISASCII(c) && isgraph(c))
+#else
+#define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
+#endif
+
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+
+#ifdef __STDC__
+#define P(s) s
+#define MALLOC_ARG_T size_t
+#else /* not __STDC__ */
+#define P(s) ()
+#define MALLOC_ARG_T unsigned
+#define volatile
+#define const
+#endif /* not __STDC__ */
+
+#if ! defined(VMS) || (! defined(VAXC) && ! defined(__DECC))
+#include <sys/types.h>
+#include <sys/stat.h>
+#else /* VMS w/ VAXC or DECC */
+#include <types.h>
+#include <stat.h>
+#include <file.h> /* avoid <fcntl.h> in io.c */
+#ifdef __DECC
+/* DEC C implies DECC$SHR, which doesn't have the %g problem of VAXCRTL */
+#undef GFMT_WORKAROUND
+#endif
+#endif /* VMS w/ VAXC or DECC */
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else /* not STDC_HEADERS */
+#include "protos.h"
+#endif /* not STDC_HEADERS */
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#ifdef NEED_MEMORY_H
+#include <memory.h>
+#endif /* NEED_MEMORY_H */
+#else /* not HAVE_STRING_H */
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif /* HAVE_STRINGS_H */
+#endif /* not HAVE_STRING_H */
+
+#ifdef NeXT
+#if __GNUC__ < 2 || __GNUC_MINOR__ < 7
+#include <libc.h>
+#endif
+#undef atof
+#define getopt GNU_getopt
+#define GFMT_WORKAROUND
+#endif /* NeXT */
+
+#if defined(atarist) || defined(VMS)
+#include <unixlib.h>
+#endif /* atarist || VMS */
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#ifndef HAVE_VPRINTF
+/* if you don't have vprintf, try this and cross your fingers. */
+#ifdef HAVE_DOPRNT
+#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp))
+#else /* not HAVE_DOPRNT */
+you
+lose
+#endif /* not HAVE_DOPRNT */
+#endif /* HAVE_VPRINTF */
+
+#ifndef HAVE_SETLOCALE
+#define setlocale(locale, val) /* nothing */
+#endif /* HAVE_SETLOCALE */
+
+#ifdef VMS
+#include "vms/redirect.h"
+#endif /*VMS*/
+
+#ifdef atarist
+#include "atari/redirect.h"
+#endif
+
+#define GNU_REGEX
+#ifdef GNU_REGEX
+#include "regex.h"
+#include "dfa.h"
+typedef struct Regexp {
+ struct re_pattern_buffer pat;
+ struct re_registers regs;
+ struct dfa dfareg;
+ int dfa;
+} Regexp;
+#define RESTART(rp,s) (rp)->regs.start[0]
+#define REEND(rp,s) (rp)->regs.end[0]
+#define SUBPATSTART(rp,s,n) (rp)->regs.start[n]
+#define SUBPATEND(rp,s,n) (rp)->regs.end[n]
+#endif /* GNU_REGEX */
+
+/* ------------------ Constants, Structures, Typedefs ------------------ */
+
+#ifndef AWKNUM
+#define AWKNUM double
+#endif
+
+#ifndef TRUE
+/* a bit hackneyed, but what the heck */
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* Figure out what '\a' really is. */
+#ifdef __STDC__
+#define BELL '\a' /* sure makes life easy, don't it? */
+#else
+# if 'z' - 'a' == 25 /* ascii */
+# if 'a' != 97 /* machine is dumb enough to use mark parity */
+# define BELL '\207'
+# else
+# define BELL '\07'
+# endif
+# else
+# define BELL '\057'
+# endif
+#endif
+
+typedef enum nodevals {
+ /* illegal entry == 0 */
+ Node_illegal,
+
+ /* binary operators lnode and rnode are the expressions to work on */
+ Node_times,
+ Node_quotient,
+ Node_mod,
+ Node_plus,
+ Node_minus,
+ Node_cond_pair, /* conditional pair (see Node_line_range) */
+ Node_subscript,
+ Node_concat,
+ Node_exp,
+
+ /* unary operators subnode is the expression to work on */
+ Node_preincrement,
+ Node_predecrement,
+ Node_postincrement,
+ Node_postdecrement,
+ Node_unary_minus,
+ Node_field_spec,
+
+ /* assignments lnode is the var to assign to, rnode is the exp */
+ Node_assign,
+ Node_assign_times,
+ Node_assign_quotient,
+ Node_assign_mod,
+ Node_assign_plus,
+ Node_assign_minus,
+ Node_assign_exp,
+
+ /* boolean binaries lnode and rnode are expressions */
+ Node_and,
+ Node_or,
+
+ /* binary relationals compares lnode and rnode */
+ Node_equal,
+ Node_notequal,
+ Node_less,
+ Node_greater,
+ Node_leq,
+ Node_geq,
+ Node_match,
+ Node_nomatch,
+
+ /* unary relationals works on subnode */
+ Node_not,
+
+ /* program structures */
+ Node_rule_list, /* lnode is a rule, rnode is rest of list */
+ Node_rule_node, /* lnode is pattern, rnode is statement */
+ Node_statement_list, /* lnode is statement, rnode is more list */
+ Node_if_branches, /* lnode is to run on true, rnode on false */
+ Node_expression_list, /* lnode is an exp, rnode is more list */
+ Node_param_list, /* lnode is a variable, rnode is more list */
+
+ /* keywords */
+ Node_K_if, /* lnode is conditonal, rnode is if_branches */
+ Node_K_while, /* lnode is condtional, rnode is stuff to run */
+ Node_K_for, /* lnode is for_struct, rnode is stuff to run */
+ Node_K_arrayfor, /* lnode is for_struct, rnode is stuff to run */
+ Node_K_break, /* no subs */
+ Node_K_continue, /* no subs */
+ Node_K_print, /* lnode is exp_list, rnode is redirect */
+ Node_K_printf, /* lnode is exp_list, rnode is redirect */
+ Node_K_next, /* no subs */
+ Node_K_exit, /* subnode is return value, or NULL */
+ Node_K_do, /* lnode is conditional, rnode stuff to run */
+ Node_K_return, /* lnode is return value */
+ Node_K_delete, /* lnode is array, rnode is subscript */
+ Node_K_getline, /* lnode is opt var, rnode is redirection */
+ Node_K_function, /* lnode is statement list, rnode is params */
+ Node_K_nextfile, /* no subs */
+
+ /* I/O redirection for print statements */
+ Node_redirect_output, /* subnode is where to redirect */
+ Node_redirect_append, /* subnode is where to redirect */
+ Node_redirect_pipe, /* subnode is where to redirect */
+ Node_redirect_pipein, /* subnode is where to redirect */
+ Node_redirect_input, /* subnode is where to redirect */
+
+ /* Variables */
+ Node_var, /* rnode is value, lnode is array stuff */
+ Node_var_array, /* array is ptr to elements, asize num of eles */
+ Node_val, /* node is a value - type in flags */
+
+ /* Builtins subnode is explist to work on, proc is func to call */
+ Node_builtin,
+
+ /*
+ * pattern: conditional ',' conditional ; lnode of Node_line_range
+ * is the two conditionals (Node_cond_pair), other word (rnode place)
+ * is a flag indicating whether or not this range has been entered.
+ */
+ Node_line_range,
+
+ /*
+ * boolean test of membership in array lnode is string-valued
+ * expression rnode is array name
+ */
+ Node_in_array,
+
+ Node_func, /* lnode is param. list, rnode is body */
+ Node_func_call, /* lnode is name, rnode is argument list */
+
+ Node_cond_exp, /* lnode is conditonal, rnode is if_branches */
+ Node_regex, /* a regexp, text, compiled, flags, etc */
+ Node_hashnode, /* an identifier in the symbol table */
+ Node_ahash, /* an array element */
+ Node_NF, /* variables recognized in the grammar */
+ Node_NR,
+ Node_FNR,
+ Node_FS,
+ Node_RS,
+ Node_FIELDWIDTHS,
+ Node_IGNORECASE,
+ Node_OFS,
+ Node_ORS,
+ Node_OFMT,
+ Node_CONVFMT,
+ Node_final /* sentry value, not legal */
+} NODETYPE;
+
+/*
+ * NOTE - this struct is a rather kludgey -- it is packed to minimize
+ * space usage, at the expense of cleanliness. Alter at own risk.
+ */
+typedef struct exp_node {
+ union {
+ struct {
+ union {
+ struct exp_node *lptr;
+ char *param_name;
+ long ll;
+ } l;
+ union {
+ struct exp_node *rptr;
+ struct exp_node *(*pptr)();
+ Regexp *preg;
+ struct for_loop_header *hd;
+ struct exp_node **av;
+ int r_ent; /* range entered */
+ } r;
+ union {
+ struct exp_node *extra;
+ long xl;
+ } x;
+ char *name;
+ short number;
+ unsigned char reflags;
+# define CASE 1
+# define CONST 2
+# define FS_DFLT 4
+ } nodep;
+ struct {
+ AWKNUM fltnum; /* this is here for optimal packing of
+ * the structure on many machines
+ */
+ char *sp;
+ size_t slen;
+ long sref;
+ int idx;
+ } val;
+ struct {
+ struct exp_node *next;
+ char *name;
+ size_t length;
+ struct exp_node *value;
+ } hash;
+#define hnext sub.hash.next
+#define hname sub.hash.name
+#define hlength sub.hash.length
+#define hvalue sub.hash.value
+ struct {
+ struct exp_node *next;
+ struct exp_node *name;
+ struct exp_node *value;
+ } ahash;
+#define ahnext sub.ahash.next
+#define ahname sub.ahash.name
+#define ahvalue sub.ahash.value
+ } sub;
+ NODETYPE type;
+ unsigned short flags;
+# define MALLOC 1 /* can be free'd */
+# define TEMP 2 /* should be free'd */
+# define PERM 4 /* can't be free'd */
+# define STRING 8 /* assigned as string */
+# define STR 16 /* string value is current */
+# define NUM 32 /* numeric value is current */
+# define NUMBER 64 /* assigned as number */
+# define MAYBE_NUM 128 /* user input: if NUMERIC then
+ * a NUMBER */
+# define ARRAYMAXED 256 /* array is at max size */
+# define SCALAR 512 /* used as scalar, can't be array */
+# define FUNC 1024 /* this parameter is really a
+ * function name; see awk.y */
+# define FIELD 2048 /* this is a field */
+
+ char *vname; /* variable's name */
+} NODE;
+
+#define lnode sub.nodep.l.lptr
+#define nextp sub.nodep.l.lptr
+#define rnode sub.nodep.r.rptr
+#define source_file sub.nodep.name
+#define source_line sub.nodep.number
+#define param_cnt sub.nodep.number
+#define param sub.nodep.l.param_name
+
+#define subnode lnode
+#define proc sub.nodep.r.pptr
+
+#define re_reg sub.nodep.r.preg
+#define re_flags sub.nodep.reflags
+#define re_text lnode
+#define re_exp sub.nodep.x.extra
+#define re_cnt sub.nodep.number
+
+#define forsub lnode
+#define forloop rnode->sub.nodep.r.hd
+
+#define stptr sub.val.sp
+#define stlen sub.val.slen
+#define stref sub.val.sref
+#define stfmt sub.val.idx
+
+#define numbr sub.val.fltnum
+
+#define var_value lnode
+#define var_array sub.nodep.r.av
+#define array_size sub.nodep.l.ll
+#define table_size sub.nodep.x.xl
+
+#define condpair lnode
+#define triggered sub.nodep.r.r_ent
+
+/* a regular for loop */
+typedef struct for_loop_header {
+ NODE *init;
+ NODE *cond;
+ NODE *incr;
+} FOR_LOOP_HEADER;
+
+/* for "for(iggy in foo) {" */
+struct search {
+ NODE *sym;
+ size_t idx;
+ NODE *bucket;
+ NODE *retval;
+};
+
+/* for faster input, bypass stdio */
+typedef struct iobuf {
+ const char *name;
+ int fd;
+ char *buf;
+ char *off;
+ char *end;
+ size_t size; /* this will be determined by an fstat() call */
+ int cnt;
+ long secsiz;
+ int flag;
+# define IOP_IS_TTY 1
+# define IOP_IS_INTERNAL 2
+# define IOP_NO_FREE 4
+# define IOP_MMAPPED 8
+# define IOP_NOFREE_OBJ 16
+ int (*getrec)();
+} IOBUF;
+
+typedef void (*Func_ptr)();
+
+/* structure used to dynamically maintain a linked-list of open files/pipes */
+struct redirect {
+ unsigned int flag;
+# define RED_FILE 1
+# define RED_PIPE 2
+# define RED_READ 4
+# define RED_WRITE 8
+# define RED_APPEND 16
+# define RED_NOBUF 32
+# define RED_USED 64 /* closed temporarily to reuse fd */
+# define RED_EOF 128
+ char *value;
+ FILE *fp;
+ FILE *ifp; /* input fp, needed for PIPES_SIMULATED */
+ IOBUF *iop;
+ int pid;
+ int status;
+ struct redirect *prev;
+ struct redirect *next;
+};
+
+/* structure for our source, either a command line string or a source file */
+struct src {
+ enum srctype { CMDLINE = 1, SOURCEFILE } stype;
+ char *val;
+};
+
+/* longjmp return codes, must be nonzero */
+/* Continue means either for loop/while continue, or next input record */
+#define TAG_CONTINUE 1
+/* Break means either for/while break, or stop reading input */
+#define TAG_BREAK 2
+/* Return means return from a function call; leave value in ret_node */
+#define TAG_RETURN 3
+
+#ifndef LONG_MAX
+#define LONG_MAX ((long)(~(1L << (sizeof (long) * 8 - 1))))
+#endif
+#ifndef ULONG_MAX
+#define ULONG_MAX (~(unsigned long)0)
+#endif
+#ifndef LONG_MIN
+#define LONG_MIN ((long)(-LONG_MAX - 1L))
+#endif
+#define HUGE LONG_MAX
+
+/* -------------------------- External variables -------------------------- */
+/* gawk builtin variables */
+extern long NF;
+extern long NR;
+extern long FNR;
+extern int IGNORECASE;
+extern int RS_is_null;
+extern char *OFS;
+extern int OFSlen;
+extern char *ORS;
+extern int ORSlen;
+extern char *OFMT;
+extern char *CONVFMT;
+extern int CONVFMTidx;
+extern int OFMTidx;
+extern NODE *CONVFMT_node, *FIELDWIDTHS_node, *FILENAME_node;
+extern NODE *FNR_node, *FS_node, *IGNORECASE_node, *NF_node;
+extern NODE *NR_node, *OFMT_node, *OFS_node, *ORS_node, *RLENGTH_node;
+extern NODE *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
+extern NODE **stack_ptr;
+extern NODE *Nnull_string;
+extern NODE **fields_arr;
+extern int sourceline;
+extern char *source;
+extern NODE *expression_value;
+
+#if __GNUC__ < 2
+extern NODE *_t; /* used as temporary in tree_eval */
+#endif
+
+extern NODE *nextfree;
+extern int field0_valid;
+extern int do_traditional;
+extern int do_posix;
+extern int do_lint;
+extern int do_lint_old;
+extern int do_intervals;
+extern int in_begin_rule;
+extern int in_end_rule;
+
+extern const char *myname;
+
+extern char quote;
+extern char *defpath;
+extern char envsep;
+
+extern char casetable[]; /* for case-independent regexp matching */
+
+/* ------------------------- Pseudo-functions ------------------------- */
+
+#define is_identchar(c) (isalnum(c) || (c) == '_')
+#define isnondecimal(str) (((str)[0]) == '0')
+
+#ifdef MPROF
+#define getnode(n) emalloc(n, NODE *, sizeof(NODE), "getnode")
+#define freenode(n) free(n)
+#else /* not MPROF */
+#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\
+ else n = more_nodes()
+#define freenode(n) ((n)->flags &= ~SCALAR, (n)->nextp = nextfree, nextfree = (n))
+#endif /* not MPROF */
+
+#ifdef DEBUG
+#undef freenode
+#define get_lhs(p, a) r_get_lhs((p), (a))
+#define m_tree_eval(t, iscond) r_tree_eval(t, iscond)
+#else
+#define get_lhs(p, a) ((p)->type == Node_var ? (&(p)->var_value) : \
+ r_get_lhs((p), (a)))
+#if __GNUC__ >= 2
+#define m_tree_eval(t, iscond) \
+ ({NODE * _t = (t); \
+ if (_t == NULL) \
+ _t = Nnull_string; \
+ else { \
+ switch(_t->type) { \
+ case Node_val: \
+ break; \
+ case Node_var: \
+ _t = _t->var_value; \
+ break; \
+ default: \
+ _t = r_tree_eval(_t, iscond);\
+ break; \
+ } \
+ } \
+ _t;})
+#else
+#define m_tree_eval(t, iscond) (_t = (t), _t == NULL ? Nnull_string : \
+ (_t->type == Node_param_list ? \
+ r_tree_eval(_t, iscond) : \
+ (_t->type == Node_val ? _t : \
+ (_t->type == Node_var ? _t->var_value : \
+ r_tree_eval(_t, iscond)))))
+#endif /* __GNUC__ */
+#endif /* not DEBUG */
+#define tree_eval(t) m_tree_eval(t, FALSE)
+
+#define make_number(x) mk_number((x), (unsigned int)(MALLOC|NUM|NUMBER))
+#define tmp_number(x) mk_number((x), (unsigned int)(MALLOC|TEMP|NUM|NUMBER))
+
+#define free_temp(n) do { if ((n)->flags&TEMP) { unref(n); }} while (FALSE)
+#define make_string(s, l) make_str_node((s), (size_t) (l), FALSE)
+#define SCAN 1
+#define ALREADY_MALLOCED 2
+
+#define cant_happen() r_fatal("internal error line %d, file: %s", \
+ __LINE__, __FILE__);
+
+#ifdef HAVE_STRINGIZE
+#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno)),0))
+#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno)),0))
+#else /* HAVE_STRINGIZE */
+#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), "var", strerror(errno)),0))
+#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), "var", strerror(errno)),0))
+#endif /* HAVE_STRINGIZE */
+
+#ifdef DEBUG
+#define force_number r_force_number
+#define force_string r_force_string
+#else /* not DEBUG */
+#ifdef lint
+extern AWKNUM force_number();
+#endif
+#if __GNUC__ >= 2
+#define force_number(n) ({NODE *_tn = (n);\
+ (_tn->flags & NUM) ?_tn->numbr : r_force_number(_tn);})
+#define force_string(s) ({NODE *_ts = (s);\
+ ((_ts->flags & STR) && \
+ (_ts->stfmt == -1 || _ts->stfmt == CONVFMTidx)) ?\
+ _ts : r_force_string(_ts);})
+#else
+#ifdef MSDOS
+extern double _msc51bug;
+#define force_number(n) (_msc51bug=(_t = (n),\
+ (_t->flags & NUM) ? _t->numbr : r_force_number(_t)))
+#else /* not MSDOS */
+#define force_number(n) (_t = (n),\
+ (_t->flags & NUM) ? _t->numbr : r_force_number(_t))
+#endif /* not MSDOS */
+#define force_string(s) (_t = (s),((_t->flags & STR) && \
+ (_t->stfmt == -1 || \
+ _t->stfmt == CONVFMTidx))? \
+ _t : r_force_string(_t))
+#endif /* not __GNUC__ */
+#endif /* not DEBUG */
+
+#define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0)
+#define STREQN(a,b,n) ((n) && *(a)== *(b) && \
+ strncmp((a), (b), (size_t) (n)) == 0)
+
+#define fatal set_loc(__FILE__, __LINE__), r_fatal
+
+/* ------------- Function prototypes or defs (as appropriate) ------------- */
+
+/* array.c */
+extern NODE *concat_exp P((NODE *tree));
+extern void assoc_clear P((NODE *symbol));
+extern unsigned int hash P((const char *s, size_t len, unsigned long hsize));
+extern int in_array P((NODE *symbol, NODE *subs));
+extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
+extern void do_delete P((NODE *symbol, NODE *tree));
+extern void assoc_scan P((NODE *symbol, struct search *lookat));
+extern void assoc_next P((struct search *lookat));
+/* awktab.c */
+extern char *tokexpand P((void));
+extern char nextc P((void));
+extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
+extern NODE *install P((char *name, NODE *value));
+extern NODE *lookup P((const char *name));
+extern NODE *variable P((char *name, int can_free, NODETYPE type));
+extern int yyparse P((void));
+/* builtin.c */
+extern double double_to_int P((double d));
+extern NODE *do_exp P((NODE *tree));
+extern NODE *do_fflush P((NODE *tree));
+extern NODE *do_index P((NODE *tree));
+extern NODE *do_int P((NODE *tree));
+extern NODE *do_length P((NODE *tree));
+extern NODE *do_log P((NODE *tree));
+extern NODE *do_sprintf P((NODE *tree));
+extern void do_printf P((NODE *tree));
+extern void print_simple P((NODE *tree, FILE *fp));
+extern NODE *do_sqrt P((NODE *tree));
+extern NODE *do_substr P((NODE *tree));
+extern NODE *do_strftime P((NODE *tree));
+extern NODE *do_systime P((NODE *tree));
+extern NODE *do_system P((NODE *tree));
+extern void do_print P((NODE *tree));
+extern NODE *do_tolower P((NODE *tree));
+extern NODE *do_toupper P((NODE *tree));
+extern NODE *do_atan2 P((NODE *tree));
+extern NODE *do_sin P((NODE *tree));
+extern NODE *do_cos P((NODE *tree));
+extern NODE *do_rand P((NODE *tree));
+extern NODE *do_srand P((NODE *tree));
+extern NODE *do_match P((NODE *tree));
+extern NODE *do_gsub P((NODE *tree));
+extern NODE *do_sub P((NODE *tree));
+extern NODE *do_gensub P((NODE *tree));
+#ifdef BITOPS
+extern NODE *do_lshift P((NODE *tree));
+extern NODE *do_rshift P((NODE *tree));
+extern NODE *do_and P((NODE *tree));
+extern NODE *do_or P((NODE *tree));
+extern NODE *do_xor P((NODE *tree));
+extern NODE *do_compl P((NODE *tree));
+extern NODE *do_strtonum P((NODE *tree));
+#endif /* BITOPS */
+#if defined(BITOPS) || defined(NONDECDATA)
+extern AWKNUM nondec2awknum P((char *str, size_t len));
+#endif /* defined(BITOPS) || defined(NONDECDATA) */
+/* eval.c */
+extern int interpret P((NODE *volatile tree));
+extern NODE *r_tree_eval P((NODE *tree, int iscond));
+extern int cmp_nodes P((NODE *t1, NODE *t2));
+extern NODE **r_get_lhs P((NODE *ptr, Func_ptr *assign));
+extern void set_IGNORECASE P((void));
+void set_OFS P((void));
+void set_ORS P((void));
+void set_OFMT P((void));
+void set_CONVFMT P((void));
+/* field.c */
+extern void init_fields P((void));
+extern void set_record P((char *buf, int cnt, int freeold));
+extern void reset_record P((void));
+extern void set_NF P((void));
+extern NODE **get_field P((long num, Func_ptr *assign));
+extern NODE *do_split P((NODE *tree));
+extern void set_FS P((void));
+extern void set_FS_if_not_FIELDWIDTHS P((void));
+extern void set_RS P((void));
+extern void set_FIELDWIDTHS P((void));
+extern int using_fieldwidths P((void));
+/* gawkmisc.c */
+extern char *gawk_name P((const char *filespec));
+extern void os_arg_fixup P((int *argcp, char ***argvp));
+extern int os_devopen P((const char *name, int flag));
+extern int optimal_bufsize P((int fd, struct stat *sbuf));
+extern int ispath P((const char *file));
+extern int isdirpunct P((int c));
+/* io.c */
+extern void set_FNR P((void));
+extern void set_NR P((void));
+extern void do_input P((void));
+extern struct redirect *redirect P((NODE *tree, int *errflg));
+extern NODE *do_close P((NODE *tree));
+extern int flush_io P((void));
+extern int close_io P((void));
+extern int devopen P((const char *name, const char *mode));
+extern int pathopen P((const char *file));
+extern NODE *do_getline P((NODE *tree));
+extern void do_nextfile P((void));
+extern struct redirect *getredirect P((char *str, int len));
+/* main.c */
+extern int main P((int argc, char **argv));
+extern void load_environ P((void));
+extern char *arg_assign P((char *arg));
+extern RETSIGTYPE catchsig P((int sig, int code));
+/* msg.c */
+extern void err P((const char *s, const char *emsg, va_list argp));
+#if _MSC_VER == 510
+extern void msg P((va_list va_alist, ...));
+extern void error P((va_list va_alist, ...));
+extern void warning P((va_list va_alist, ...));
+extern void set_loc P((char *file, int line));
+extern void r_fatal P((va_list va_alist, ...));
+#else
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+extern void msg (char *mesg, ...);
+extern void error (char *mesg, ...);
+extern void warning (char *mesg, ...);
+extern void set_loc (char *file, int line);
+extern void r_fatal (char *mesg, ...);
+#else
+extern void msg ();
+extern void error ();
+extern void warning ();
+extern void set_loc ();
+extern void r_fatal ();
+#endif
+#endif
+/* node.c */
+extern AWKNUM r_force_number P((NODE *n));
+extern NODE *format_val P((char *format, int index, NODE *s));
+extern NODE *r_force_string P((NODE *s));
+extern NODE *dupnode P((NODE *n));
+extern NODE *mk_number P((AWKNUM x, unsigned int flags));
+extern NODE *make_str_node P((char *s, size_t len, int scan ));
+extern NODE *tmp_string P((char *s, size_t len ));
+extern NODE *more_nodes P((void));
+#ifdef DEBUG
+extern void freenode P((NODE *it));
+#endif
+extern void unref P((NODE *tmp));
+extern int parse_escape P((char **string_ptr));
+/* re.c */
+extern Regexp *make_regexp P((char *s, size_t len, int ignorecase, int dfa));
+extern int research P((Regexp *rp, char *str, int start,
+ size_t len, int need_start));
+extern void refree P((Regexp *rp));
+extern void reg_error P((const char *s));
+extern Regexp *re_update P((NODE *t));
+extern void resyntax P((int syntax));
+extern void resetup P((void));
+extern int avoid_dfa P((NODE *re, char *str, size_t len)); /* temporary */
+
+/* strncasecmp.c */
+extern int strncasecmp P((const char *s1, const char *s2, register size_t n));
+
+#if defined(atarist)
+#if defined(PIPES_SIMULATED)
+/* atari/tmpnam.c */
+extern char *tmpnam P((char *buf));
+extern char *tempnam P((const char *path, const char *base));
+#else
+#include <wait.h>
+#endif
+#include <fcntl.h>
+#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
+#else
+#define INVALID_HANDLE (-1)
+#endif /* atarist */
+
+#ifndef STATIC
+#define STATIC static
+#endif
+
+#ifdef C_ALLOCA
+/* The __hpux check is to avoid conflicts with bison's definition of
+ alloca() in awktab.c.*/
+#if (defined(__STDC__) && __STDC__) || defined (__hpux)
+extern void *alloca P((unsigned));
+#else
+extern char *alloca P((unsigned));
+#endif
+#endif
diff --git a/contrib/awk/awk.y b/contrib/awk/awk.y
new file mode 100644
index 0000000..1b9a89b
--- /dev/null
+++ b/contrib/awk/awk.y
@@ -0,0 +1,2434 @@
+/*
+ * awk.y --- yacc/bison parser
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+%{
+#ifdef DEBUG
+#define YYDEBUG 12
+#endif
+
+#include "awk.h"
+
+#define CAN_FREE TRUE
+#define DONT_FREE FALSE
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void yyerror(const char *m, ...) ;
+#else
+static void yyerror(); /* va_alist */
+#endif
+static char *get_src_buf P((void));
+static int yylex P((void));
+static NODE *node_common P((NODETYPE op));
+static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
+static NODE *mkrangenode P((NODE *cpair));
+static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
+static NODE *append_right P((NODE *list, NODE *new));
+static void func_install P((NODE *params, NODE *def));
+static void pop_var P((NODE *np, int freeit));
+static void pop_params P((NODE *params));
+static NODE *make_param P((char *name));
+static NODE *mk_rexp P((NODE *exp));
+static int dup_parms P((NODE *func));
+static void param_sanity P((NODE *arglist));
+static int isnoeffect P((NODETYPE t));
+static int isassignable P((NODE *n));
+
+enum defref { FUNC_DEFINE, FUNC_USE };
+static void func_use P((char *name, enum defref how));
+static void check_funcs P((void));
+
+static int want_assign; /* lexical scanning kludge */
+static int want_regexp; /* lexical scanning kludge */
+static int can_return; /* lexical scanning kludge */
+static int io_allowed = TRUE; /* lexical scanning kludge */
+static char *lexptr; /* pointer to next char during parsing */
+static char *lexend;
+static char *lexptr_begin; /* keep track of where we were for error msgs */
+static char *lexeme; /* beginning of lexeme for debugging */
+static char *thisline = NULL;
+#define YYDEBUG_LEXER_TEXT (lexeme)
+static int param_counter;
+static char *tokstart = NULL;
+static char *tok = NULL;
+static char *tokend;
+
+#define HASHSIZE 1021 /* this constant only used here */
+NODE *variables[HASHSIZE];
+
+extern char *source;
+extern int sourceline;
+extern struct src *srcfiles;
+extern int numfiles;
+extern int errcount;
+extern NODE *begin_block;
+extern NODE *end_block;
+%}
+
+%union {
+ long lval;
+ AWKNUM fval;
+ NODE *nodeval;
+ NODETYPE nodetypeval;
+ char *sval;
+ NODE *(*ptrval)();
+}
+
+%type <nodeval> function_prologue function_body
+%type <nodeval> rexp exp start program rule simp_exp
+%type <nodeval> non_post_simp_exp
+%type <nodeval> pattern
+%type <nodeval> action variable param_list
+%type <nodeval> rexpression_list opt_rexpression_list
+%type <nodeval> expression_list opt_expression_list
+%type <nodeval> statements statement if_statement opt_param_list
+%type <nodeval> opt_exp opt_variable regexp
+%type <nodeval> input_redir output_redir
+%type <nodetypeval> print
+%type <sval> func_name
+%type <lval> lex_builtin
+
+%token <sval> FUNC_CALL NAME REGEXP
+%token <lval> ERROR
+%token <nodeval> YNUMBER YSTRING
+%token <nodetypeval> RELOP APPEND_OP
+%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP
+%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
+%token <nodetypeval> LEX_IN
+%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
+%token <lval> LEX_BUILTIN LEX_LENGTH
+
+/* these are just yylval numbers */
+
+/* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
+%left LEX_OR
+%left LEX_AND
+%left LEX_GETLINE
+%nonassoc LEX_IN
+%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
+%nonassoc ','
+%nonassoc MATCHOP
+%nonassoc RELOP '<' '>' '|' APPEND_OP
+%left CONCAT_OP
+%left YSTRING YNUMBER
+%left '+' '-'
+%left '*' '/' '%'
+%right '!' UNARY
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
+%left '(' ')'
+%%
+
+start
+ : opt_nls program opt_nls
+ {
+ expression_value = $2;
+ check_funcs();
+ }
+ ;
+
+program
+ : rule
+ {
+ if ($1 != NULL)
+ $$ = $1;
+ else
+ $$ = NULL;
+ yyerrok;
+ }
+ | program rule
+ /* add the rule to the tail of list */
+ {
+ if ($2 == NULL)
+ $$ = $1;
+ else if ($1 == NULL)
+ $$ = $2;
+ else {
+ if ($1->type != Node_rule_list)
+ $1 = node($1, Node_rule_list,
+ (NODE*) NULL);
+ $$ = append_right($1,
+ node($2, Node_rule_list, (NODE *) NULL));
+ }
+ yyerrok;
+ }
+ | error { $$ = NULL; }
+ | program error { $$ = NULL; }
+ | /* empty */ { $$ = NULL; }
+ ;
+
+rule
+ : LEX_BEGIN { io_allowed = FALSE; }
+ action
+ {
+ if (begin_block != NULL) {
+ if (begin_block->type != Node_rule_list)
+ begin_block = node(begin_block, Node_rule_list,
+ (NODE *) NULL);
+ (void) append_right(begin_block, node(
+ node((NODE *) NULL, Node_rule_node, $3),
+ Node_rule_list, (NODE *) NULL) );
+ } else
+ begin_block = node((NODE *) NULL, Node_rule_node, $3);
+ $$ = NULL;
+ io_allowed = TRUE;
+ yyerrok;
+ }
+ | LEX_END { io_allowed = FALSE; }
+ action
+ {
+ if (end_block != NULL) {
+ if (end_block->type != Node_rule_list)
+ end_block = node(end_block, Node_rule_list,
+ (NODE *) NULL);
+ (void) append_right (end_block, node(
+ node((NODE *) NULL, Node_rule_node, $3),
+ Node_rule_list, (NODE *) NULL));
+ } else
+ end_block = node((NODE *) NULL, Node_rule_node, $3);
+ $$ = NULL;
+ io_allowed = TRUE;
+ yyerrok;
+ }
+ | LEX_BEGIN statement_term
+ {
+ warning("BEGIN blocks must have an action part");
+ errcount++;
+ yyerrok;
+ }
+ | LEX_END statement_term
+ {
+ warning("END blocks must have an action part");
+ errcount++;
+ yyerrok;
+ }
+ | pattern action
+ { $$ = node($1, Node_rule_node, $2); yyerrok; }
+ | action
+ { $$ = node((NODE *) NULL, Node_rule_node, $1); yyerrok; }
+ | pattern statement_term
+ {
+ $$ = node($1,
+ Node_rule_node,
+ node(node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL),
+ Node_K_print,
+ (NODE *) NULL));
+ yyerrok;
+ }
+ | function_prologue function_body
+ {
+ func_install($1, $2);
+ $$ = NULL;
+ yyerrok;
+ }
+ ;
+
+func_name
+ : NAME
+ { $$ = $1; }
+ | FUNC_CALL
+ { $$ = $1; }
+ | lex_builtin
+ {
+ yyerror("%s() is a built-in function, it cannot be redefined",
+ tokstart);
+ errcount++;
+ /* yyerrok; */
+ }
+ ;
+
+lex_builtin
+ : LEX_BUILTIN
+ | LEX_LENGTH
+ ;
+
+function_prologue
+ : LEX_FUNCTION
+ {
+ param_counter = 0;
+ }
+ func_name '(' opt_param_list r_paren opt_nls
+ {
+ NODE *t;
+
+ t = make_param($3);
+ t->flags |= FUNC;
+ $$ = append_right(t, $5);
+ can_return = TRUE;
+ /* check for duplicate parameter names */
+ if (dup_parms($$))
+ errcount++;
+ }
+ ;
+
+function_body
+ : l_brace statements r_brace opt_semi
+ {
+ $$ = $2;
+ can_return = FALSE;
+ }
+ | l_brace r_brace opt_semi opt_nls
+ {
+ $$ = node((NODE *) NULL, Node_K_return, (NODE *) NULL);
+ can_return = FALSE;
+ }
+ ;
+
+
+pattern
+ : exp
+ { $$ = $1; }
+ | exp ',' exp
+ { $$ = mkrangenode(node($1, Node_cond_pair, $3)); }
+ ;
+
+regexp
+ /*
+ * In this rule, want_regexp tells yylex that the next thing
+ * is a regexp so it should read up to the closing slash.
+ */
+ : '/'
+ { ++want_regexp; }
+ REGEXP '/'
+ {
+ NODE *n;
+ size_t len;
+
+ getnode(n);
+ n->type = Node_regex;
+ len = strlen($3);
+ n->re_exp = make_string($3, len);
+ n->re_reg = make_regexp($3, len, FALSE, TRUE);
+ n->re_text = NULL;
+ n->re_flags = CONST;
+ n->re_cnt = 1;
+ $$ = n;
+ }
+ ;
+
+action
+ : l_brace statements r_brace opt_semi opt_nls
+ { $$ = $2; }
+ | l_brace r_brace opt_semi opt_nls
+ { $$ = NULL; }
+ ;
+
+statements
+ : statement
+ {
+ $$ = $1;
+ if (do_lint && isnoeffect($$->type))
+ warning("statement may have no effect");
+ }
+ | statements statement
+ {
+ if ($1 == NULL || $1->type != Node_statement_list)
+ $1 = node($1, Node_statement_list, (NODE *) NULL);
+ $$ = append_right($1,
+ node($2, Node_statement_list, (NODE *) NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | statements error
+ { $$ = NULL; }
+ ;
+
+statement_term
+ : nls
+ | semi opt_nls
+ ;
+
+statement
+ : semi opt_nls
+ { $$ = NULL; }
+ | l_brace r_brace
+ { $$ = NULL; }
+ | l_brace statements r_brace
+ { $$ = $2; }
+ | if_statement
+ { $$ = $1; }
+ | LEX_WHILE '(' exp r_paren opt_nls statement
+ { $$ = node($3, Node_K_while, $6); }
+ | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
+ { $$ = node($6, Node_K_do, $3); }
+ | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
+ {
+ $$ = node($8, Node_K_arrayfor,
+ make_for_loop(variable($3, CAN_FREE, Node_var),
+ (NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
+ }
+ | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement
+ {
+ $$ = node($10, Node_K_for, (NODE *) make_for_loop($3, $5, $7));
+ }
+ | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement
+ {
+ $$ = node($9, Node_K_for,
+ (NODE *) make_for_loop($3, (NODE *) NULL, $6));
+ }
+ | LEX_BREAK statement_term
+ /* for break, maybe we'll have to remember where to break to */
+ { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
+ | LEX_CONTINUE statement_term
+ /* similarly */
+ { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
+ | print '(' expression_list r_paren output_redir statement_term
+ { $$ = node($3, $1, $5); }
+ | print opt_rexpression_list output_redir statement_term
+ {
+ if ($1 == Node_K_print && $2 == NULL) {
+ static int warned = FALSE;
+
+ $2 = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+
+ if (do_lint && ! io_allowed && ! warned) {
+ warned = TRUE;
+ warning(
+ "plain `print' in BEGIN or END rule should probably be `print \"\"'");
+ }
+ }
+
+ $$ = node($2, $1, $3);
+ }
+ | LEX_NEXT opt_exp statement_term
+ { NODETYPE type;
+
+ if ($2) {
+ if ($2 == lookup("file")) {
+ static int warned = FALSE;
+
+ if (! warned) {
+ warned = TRUE;
+ warning("`next file' is obsolete; use `nextfile'");
+ }
+ if (do_lint)
+ warning("`next file' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`next file' is a gawk extension");
+ }
+ if (! io_allowed) {
+ /* same thing */
+ errcount++;
+ error("`next file' used in BEGIN or END action");
+ }
+ type = Node_K_nextfile;
+ } else {
+ errcount++;
+ error("illegal expression after `next'");
+ type = Node_K_next; /* sanity */
+ }
+ } else {
+ if (! io_allowed)
+ yyerror("`next' used in BEGIN or END action");
+ type = Node_K_next;
+ }
+ $$ = node((NODE *) NULL, type, (NODE *) NULL);
+ }
+ | LEX_NEXTFILE statement_term
+ {
+ if (do_lint)
+ warning("`nextfile' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`nextfile' is a gawk extension");
+ }
+ if (! io_allowed) {
+ /* same thing */
+ errcount++;
+ error("`nextfile' used in BEGIN or END action");
+ }
+ $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
+ }
+ | LEX_EXIT opt_exp statement_term
+ { $$ = node($2, Node_K_exit, (NODE *) NULL); }
+ | LEX_RETURN
+ {
+ if (! can_return)
+ yyerror("`return' used outside function context");
+ }
+ opt_exp statement_term
+ { $$ = node($3, Node_K_return, (NODE *) NULL); }
+ | LEX_DELETE NAME '[' expression_list ']' statement_term
+ { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
+ | LEX_DELETE NAME statement_term
+ {
+ if (do_lint)
+ warning("`delete array' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`delete array' is a gawk extension");
+ }
+ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
+ }
+ | exp statement_term
+ { $$ = $1; }
+ ;
+
+print
+ : LEX_PRINT
+ { $$ = $1; }
+ | LEX_PRINTF
+ { $$ = $1; }
+ ;
+
+if_statement
+ : LEX_IF '(' exp r_paren opt_nls statement
+ {
+ $$ = node($3, Node_K_if,
+ node($6, Node_if_branches, (NODE *) NULL));
+ }
+ | LEX_IF '(' exp r_paren opt_nls statement
+ LEX_ELSE opt_nls statement
+ { $$ = node($3, Node_K_if,
+ node($6, Node_if_branches, $9)); }
+ ;
+
+nls
+ : NEWLINE
+ { want_assign = FALSE; }
+ | nls NEWLINE
+ ;
+
+opt_nls
+ : /* empty */
+ | nls
+ ;
+
+input_redir
+ : /* empty */
+ { $$ = NULL; }
+ | '<' simp_exp
+ { $$ = node($2, Node_redirect_input, (NODE *) NULL); }
+ ;
+
+output_redir
+ : /* empty */
+ { $$ = NULL; }
+ | '>' exp
+ { $$ = node($2, Node_redirect_output, (NODE *) NULL); }
+ | APPEND_OP exp
+ { $$ = node($2, Node_redirect_append, (NODE *) NULL); }
+ | '|' exp
+ { $$ = node($2, Node_redirect_pipe, (NODE *) NULL); }
+ ;
+
+opt_param_list
+ : /* empty */
+ { $$ = NULL; }
+ | param_list
+ { $$ = $1; }
+ ;
+
+param_list
+ : NAME
+ { $$ = make_param($1); }
+ | param_list comma NAME
+ { $$ = append_right($1, make_param($3)); yyerrok; }
+ | error
+ { $$ = NULL; }
+ | param_list error
+ { $$ = NULL; }
+ | param_list comma error
+ { $$ = NULL; }
+ ;
+
+/* optional expression, as in for loop */
+opt_exp
+ : /* empty */
+ { $$ = NULL; }
+ | exp
+ { $$ = $1; }
+ ;
+
+opt_rexpression_list
+ : /* empty */
+ { $$ = NULL; }
+ | rexpression_list
+ { $$ = $1; }
+ ;
+
+rexpression_list
+ : rexp
+ { $$ = node($1, Node_expression_list, (NODE *) NULL); }
+ | rexpression_list comma rexp
+ {
+ $$ = append_right($1,
+ node($3, Node_expression_list, (NODE *) NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | rexpression_list error
+ { $$ = NULL; }
+ | rexpression_list error rexp
+ { $$ = NULL; }
+ | rexpression_list comma error
+ { $$ = NULL; }
+ ;
+
+opt_expression_list
+ : /* empty */
+ { $$ = NULL; }
+ | expression_list
+ { $$ = $1; }
+ ;
+
+expression_list
+ : exp
+ { $$ = node($1, Node_expression_list, (NODE *) NULL); }
+ | expression_list comma exp
+ {
+ $$ = append_right($1,
+ node($3, Node_expression_list, (NODE *) NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | expression_list error
+ { $$ = NULL; }
+ | expression_list error exp
+ { $$ = NULL; }
+ | expression_list comma error
+ { $$ = NULL; }
+ ;
+
+/* Expressions, not including the comma operator. */
+exp : variable ASSIGNOP
+ { want_assign = FALSE; }
+ exp
+ {
+ if (do_lint && $4->type == Node_regex)
+ warning("Regular expression on left of assignment.");
+ $$ = node($1, $2, $4);
+ }
+ | '(' expression_list r_paren LEX_IN NAME
+ { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
+ | exp '|' LEX_GETLINE opt_variable
+ {
+ $$ = node($4, Node_K_getline,
+ node($1, Node_redirect_pipein, (NODE *) NULL));
+ }
+ | LEX_GETLINE opt_variable input_redir
+ {
+ if (do_lint && ! io_allowed && $3 == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
+ $$ = node($2, Node_K_getline, $3);
+ }
+ | exp LEX_AND exp
+ { $$ = node($1, Node_and, $3); }
+ | exp LEX_OR exp
+ { $$ = node($1, Node_or, $3); }
+ | exp MATCHOP exp
+ {
+ if ($1->type == Node_regex)
+ warning("Regular expression on left of MATCH operator.");
+ $$ = node($1, $2, mk_rexp($3));
+ }
+ | regexp
+ {
+ $$ = $1;
+ if (do_lint && tokstart[0] == '*') {
+ /* possible C comment */
+ int n = strlen(tokstart) - 1;
+ if (tokstart[n] == '*')
+ warning("regexp looks like a C comment, but is not");
+ }
+ }
+ | '!' regexp %prec UNARY
+ {
+ $$ = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_nomatch,
+ $2);
+ }
+ | exp LEX_IN NAME
+ { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
+ | exp RELOP exp
+ {
+ if (do_lint && $3->type == Node_regex)
+ warning("Regular expression on left of comparison.");
+ $$ = node($1, $2, $3);
+ }
+ | exp '<' exp
+ { $$ = node($1, Node_less, $3); }
+ | exp '>' exp
+ { $$ = node($1, Node_greater, $3); }
+ | exp '?' exp ':' exp
+ { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+ | simp_exp
+ { $$ = $1; }
+ | exp simp_exp %prec CONCAT_OP
+ { $$ = node($1, Node_concat, $2); }
+ ;
+
+rexp
+ : variable ASSIGNOP
+ { want_assign = FALSE; }
+ rexp
+ { $$ = node($1, $2, $4); }
+ | rexp LEX_AND rexp
+ { $$ = node($1, Node_and, $3); }
+ | rexp LEX_OR rexp
+ { $$ = node($1, Node_or, $3); }
+ | LEX_GETLINE opt_variable input_redir
+ {
+ if (do_lint && ! io_allowed && $3 == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
+ $$ = node($2, Node_K_getline, $3);
+ }
+ | regexp
+ { $$ = $1; }
+ | '!' regexp %prec UNARY
+ { $$ = node((NODE *) NULL, Node_nomatch, $2); }
+ | rexp MATCHOP rexp
+ { $$ = node($1, $2, mk_rexp($3)); }
+ | rexp LEX_IN NAME
+ { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
+ | rexp RELOP rexp
+ { $$ = node($1, $2, $3); }
+ | rexp '?' rexp ':' rexp
+ { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+ | simp_exp
+ { $$ = $1; }
+ | rexp simp_exp %prec CONCAT_OP
+ { $$ = node($1, Node_concat, $2); }
+ ;
+
+simp_exp
+ : non_post_simp_exp
+ /* Binary operators in order of decreasing precedence. */
+ | simp_exp '^' simp_exp
+ { $$ = node($1, Node_exp, $3); }
+ | simp_exp '*' simp_exp
+ { $$ = node($1, Node_times, $3); }
+ | simp_exp '/' simp_exp
+ { $$ = node($1, Node_quotient, $3); }
+ | simp_exp '%' simp_exp
+ { $$ = node($1, Node_mod, $3); }
+ | simp_exp '+' simp_exp
+ { $$ = node($1, Node_plus, $3); }
+ | simp_exp '-' simp_exp
+ { $$ = node($1, Node_minus, $3); }
+ | variable INCREMENT
+ { $$ = node($1, Node_postincrement, (NODE *) NULL); }
+ | variable DECREMENT
+ { $$ = node($1, Node_postdecrement, (NODE *) NULL); }
+ ;
+
+non_post_simp_exp
+ : '!' simp_exp %prec UNARY
+ { $$ = node($2, Node_not, (NODE *) NULL); }
+ | '(' exp r_paren
+ { $$ = $2; }
+ | LEX_BUILTIN
+ '(' opt_expression_list r_paren
+ { $$ = snode($3, Node_builtin, (int) $1); }
+ | LEX_LENGTH '(' opt_expression_list r_paren
+ { $$ = snode($3, Node_builtin, (int) $1); }
+ | LEX_LENGTH
+ {
+ if (do_lint)
+ warning("call of `length' without parentheses is not portable");
+ $$ = snode((NODE *) NULL, Node_builtin, (int) $1);
+ if (do_posix)
+ warning("call of `length' without parentheses is deprecated by POSIX");
+ }
+ | FUNC_CALL '(' opt_expression_list r_paren
+ {
+ $$ = node($3, Node_func_call, make_string($1, strlen($1)));
+ func_use($1, FUNC_USE);
+ param_sanity($3);
+ free($1);
+ }
+ | variable
+ | INCREMENT variable
+ { $$ = node($2, Node_preincrement, (NODE *) NULL); }
+ | DECREMENT variable
+ { $$ = node($2, Node_predecrement, (NODE *) NULL); }
+ | YNUMBER
+ { $$ = $1; }
+ | YSTRING
+ { $$ = $1; }
+
+ | '-' simp_exp %prec UNARY
+ {
+ if ($2->type == Node_val) {
+ $2->numbr = -(force_number($2));
+ $$ = $2;
+ } else
+ $$ = node($2, Node_unary_minus, (NODE *) NULL);
+ }
+ | '+' simp_exp %prec UNARY
+ {
+ /*
+ * was: $$ = $2
+ * POSIX semantics: force a conversion to numeric type
+ */
+ $$ = node (make_number(0.0), Node_plus, $2);
+ }
+ ;
+
+opt_variable
+ : /* empty */
+ { $$ = NULL; }
+ | variable
+ { $$ = $1; }
+ ;
+
+variable
+ : NAME
+ { $$ = variable($1, CAN_FREE, Node_var); }
+ | NAME '[' expression_list ']'
+ {
+ if ($3->rnode == NULL) {
+ $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
+ freenode($3);
+ } else
+ $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
+ }
+ | '$' non_post_simp_exp
+ { $$ = node($2, Node_field_spec, (NODE *) NULL); }
+ ;
+
+l_brace
+ : '{' opt_nls
+ ;
+
+r_brace
+ : '}' opt_nls { yyerrok; }
+ ;
+
+r_paren
+ : ')' { yyerrok; }
+ ;
+
+opt_semi
+ : /* empty */
+ | semi
+ ;
+
+semi
+ : ';' { yyerrok; want_assign = FALSE; }
+ ;
+
+comma : ',' opt_nls { yyerrok; }
+ ;
+
+%%
+
+struct token {
+ const char *operator; /* text to match */
+ NODETYPE value; /* node type */
+ int class; /* lexical class */
+ unsigned flags; /* # of args. allowed and compatability */
+# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
+# define A(n) (1<<(n))
+# define VERSION 0xFF00 /* old awk is zero */
+# define NOT_OLD 0x0100 /* feature not in old awk */
+# define NOT_POSIX 0x0200 /* feature not in POSIX */
+# define GAWKX 0x0400 /* gawk extension */
+# define RESX 0x0800 /* Bell Labs Research extension */
+ NODE *(*ptr)(); /* function that implements this keyword */
+};
+
+extern NODE
+ *do_exp(), *do_getline(), *do_index(), *do_length(),
+ *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(),
+ *do_split(), *do_system(), *do_int(), *do_close(),
+ *do_atan2(), *do_sin(), *do_cos(), *do_rand(),
+ *do_srand(), *do_match(), *do_tolower(), *do_toupper(),
+ *do_sub(), *do_gsub(), *do_strftime(), *do_systime(),
+ *do_fflush();
+
+/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
+
+static struct token tokentab[] = {
+{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
+{"END", Node_illegal, LEX_END, 0, 0},
+#ifdef BITOPS
+{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and},
+#endif /* BITOPS */
+{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
+{"break", Node_K_break, LEX_BREAK, 0, 0},
+{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close},
+#ifdef BITOPS
+{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl},
+#endif /* BITOPS */
+{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
+{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
+{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
+{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
+{"else", Node_illegal, LEX_ELSE, 0, 0},
+{"exit", Node_K_exit, LEX_EXIT, 0, 0},
+{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
+{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
+{"for", Node_K_for, LEX_FOR, 0, 0},
+{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
+{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
+{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
+{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
+{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"if", Node_K_if, LEX_IF, 0, 0},
+{"in", Node_illegal, LEX_IN, 0, 0},
+{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
+{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
+{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
+{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
+#ifdef BITOPS
+{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift},
+#endif /* BITOPS */
+{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
+{"next", Node_K_next, LEX_NEXT, 0, 0},
+{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
+#ifdef BITOPS
+{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or},
+#endif /* BITOPS */
+{"print", Node_K_print, LEX_PRINT, 0, 0},
+{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
+{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
+{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
+#ifdef BITOPS
+{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift},
+#endif /* BITOPS */
+{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
+{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
+{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
+{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
+{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime},
+#ifdef BITOPS
+{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
+#endif /* BITOPS */
+{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
+{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
+{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
+{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
+{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
+{"while", Node_K_while, LEX_WHILE, 0, 0},
+#ifdef BITOPS
+{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
+#endif /* BITOPS */
+};
+
+/* yyerror --- print a syntax error message, show where */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void
+yyerror(const char *m, ...)
+#else
+/* VARARGS0 */
+static void
+yyerror(va_alist)
+va_dcl
+#endif
+{
+ va_list args;
+ const char *mesg = NULL;
+ register char *bp, *cp;
+ char *scan;
+ char buf[120];
+ static char end_of_file_line[] = "(END OF FILE)";
+
+ errcount++;
+ /* Find the current line in the input file */
+ if (lexptr && lexeme) {
+ if (thisline == NULL) {
+ cp = lexeme;
+ if (*cp == '\n') {
+ cp--;
+ mesg = "unexpected newline";
+ }
+ for (; cp != lexptr_begin && *cp != '\n'; --cp)
+ continue;
+ if (*cp == '\n')
+ cp++;
+ thisline = cp;
+ }
+ /* NL isn't guaranteed */
+ bp = lexeme;
+ while (bp < lexend && *bp && *bp != '\n')
+ bp++;
+ } else {
+ thisline = end_of_file_line;
+ bp = thisline + strlen(thisline);
+ }
+ msg("%.*s", (int) (bp - thisline), thisline);
+ bp = buf;
+ cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */
+ if (lexptr != NULL) {
+ scan = thisline;
+ while (bp < cp && scan < lexeme)
+ if (*scan++ == '\t')
+ *bp++ = '\t';
+ else
+ *bp++ = ' ';
+ *bp++ = '^';
+ *bp++ = ' ';
+ }
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, m);
+ if (mesg == NULL)
+ mesg = m;
+#else
+ va_start(args);
+ if (mesg == NULL)
+ mesg = va_arg(args, char *);
+#endif
+ strcpy(bp, mesg);
+ err("", buf, args);
+ va_end(args);
+}
+
+/* get_src_buf --- read the next buffer of source program */
+
+static char *
+get_src_buf()
+{
+ static int samefile = FALSE;
+ static int nextfile = 0;
+ static char *buf = NULL;
+ static int fd;
+ int n;
+ register char *scan;
+ static int len = 0;
+ static int did_newline = FALSE;
+ int newfile;
+ struct stat sbuf;
+
+# define SLOP 128 /* enough space to hold most source lines */
+
+again:
+ newfile = FALSE;
+ if (nextfile > numfiles)
+ return NULL;
+
+ if (srcfiles[nextfile].stype == CMDLINE) {
+ if (len == 0) {
+ len = strlen(srcfiles[nextfile].val);
+ if (len == 0) {
+ /*
+ * Yet Another Special case:
+ * gawk '' /path/name
+ * Sigh.
+ */
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("empty program text on command line");
+ }
+ ++nextfile;
+ goto again;
+ }
+ sourceline = 1;
+ lexptr = lexptr_begin = srcfiles[nextfile].val;
+ lexend = lexptr + len;
+ } else if (! did_newline && *(lexptr-1) != '\n') {
+ /*
+ * The following goop is to ensure that the source
+ * ends with a newline and that the entire current
+ * line is available for error messages.
+ */
+ int offset;
+
+ did_newline = TRUE;
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ len = lexptr - scan;
+ emalloc(buf, char *, len+1, "get_src_buf");
+ memcpy(buf, scan, len);
+ thisline = buf;
+ lexptr = buf + len;
+ *lexptr = '\n';
+ lexeme = lexptr - offset;
+ lexptr_begin = buf;
+ lexend = lexptr + 1;
+ } else {
+ len = 0;
+ lexeme = lexptr = lexptr_begin = NULL;
+ }
+ if (lexptr == NULL && ++nextfile <= numfiles)
+ goto again;
+ return lexptr;
+ }
+ if (! samefile) {
+ source = srcfiles[nextfile].val;
+ if (source == NULL) {
+ if (buf != NULL) {
+ free(buf);
+ buf = NULL;
+ }
+ len = 0;
+ return lexeme = lexptr = lexptr_begin = NULL;
+ }
+ fd = pathopen(source);
+ if (fd <= INVALID_HANDLE) {
+ char *in;
+
+ /* suppress file name and line no. in error mesg */
+ in = source;
+ source = NULL;
+ fatal("can't open source file \"%s\" for reading (%s)",
+ in, strerror(errno));
+ }
+ len = optimal_bufsize(fd, & sbuf);
+ newfile = TRUE;
+ if (buf != NULL)
+ free(buf);
+ emalloc(buf, char *, len + SLOP, "get_src_buf");
+ lexptr_begin = buf + SLOP;
+ samefile = TRUE;
+ sourceline = 1;
+ } else {
+ /*
+ * Here, we retain the current source line (up to length SLOP)
+ * in the beginning of the buffer that was overallocated above
+ */
+ int offset;
+ int linelen;
+
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ linelen = lexptr - scan;
+ if (linelen > SLOP)
+ linelen = SLOP;
+ thisline = buf + SLOP - linelen;
+ memcpy(thisline, scan, linelen);
+ lexeme = buf + SLOP - offset;
+ lexptr_begin = thisline;
+ }
+ n = read(fd, buf + SLOP, len);
+ if (n == -1)
+ fatal("can't read sourcefile \"%s\" (%s)",
+ source, strerror(errno));
+ if (n == 0) {
+ if (newfile) {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("source file `%s' is empty", source);
+ }
+ }
+ close(fd);
+ samefile = FALSE;
+ nextfile++;
+ if (lexeme)
+ *lexeme = '\0';
+ len = 0;
+ goto again;
+ }
+ lexptr = buf + SLOP;
+ lexend = lexptr + n;
+ return buf;
+}
+
+/* tokadd --- add a character to the token buffer */
+
+#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
+
+/* tokexpand --- grow the token buffer */
+
+char *
+tokexpand()
+{
+ static int toksize = 60;
+ int tokoffset;
+
+ tokoffset = tok - tokstart;
+ toksize *= 2;
+ if (tokstart != NULL)
+ erealloc(tokstart, char *, toksize, "tokexpand");
+ else
+ emalloc(tokstart, char *, toksize, "tokexpand");
+ tokend = tokstart + toksize;
+ tok = tokstart + tokoffset;
+ return tok;
+}
+
+/* nextc --- get the next input character */
+
+#if DEBUG
+int
+nextc()
+{
+ int c;
+
+ if (lexptr && lexptr < lexend)
+ c = *lexptr++;
+ else if (get_src_buf())
+ c = *lexptr++;
+ else
+ c = EOF;
+
+ return c;
+}
+#else
+#define nextc() ((lexptr && lexptr < lexend) ? \
+ *lexptr++ : \
+ (get_src_buf() ? *lexptr++ : EOF) \
+ )
+#endif
+
+/* pushback --- push a character back on the input */
+
+#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
+
+/* allow_newline --- allow newline after &&, ||, ? and : */
+
+static void
+allow_newline()
+{
+ int c;
+
+ for (;;) {
+ c = nextc();
+ if (c == EOF)
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != EOF)
+ continue;
+ if (c == EOF)
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+}
+
+/* yylex --- Read the input and turn it into tokens. */
+
+static int
+yylex()
+{
+ register int c, c1;
+ int seen_e = FALSE; /* These are for numbers */
+ int seen_point = FALSE;
+ int esc_seen; /* for literal strings */
+ int low, mid, high;
+ static int did_newline = FALSE;
+ char *tokkey;
+ static int lasttok = 0, eof_warned = FALSE;
+ int inhex = FALSE;
+
+ if (nextc() == EOF) {
+ if (lasttok != NEWLINE) {
+ lasttok = NEWLINE;
+ if (do_lint && ! eof_warned) {
+ warning("source file does not end in newline");
+ eof_warned = TRUE;
+ }
+ return NEWLINE; /* fake it */
+ }
+ return 0;
+ }
+ pushback();
+#ifdef OS2
+ /*
+ * added for OS/2's extproc feature of cmd.exe
+ * (like #! in BSD sh)
+ */
+ if (strncasecmp(lexptr, "extproc ", 8) == 0) {
+ while (*lexptr && *lexptr != '\n')
+ lexptr++;
+ }
+#endif
+ lexeme = lexptr;
+ thisline = NULL;
+ if (want_regexp) {
+ int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
+ /*
+ * Counting brackets is non-trivial. [[] is ok,
+ * and so is [\]], with a point being that /[/]/ as a regexp
+ * constant has to work.
+ *
+ * Do not count [ or ] if either one is preceded by a \.
+ * A `[' should be counted if
+ * a) it is the first one so far (in_brack == 0)
+ * b) it is the `[' in `[:'
+ * A ']' should be counted if not preceded by a \, since
+ * it is either closing `:]' or just a plain list.
+ * According to POSIX, []] is how you put a ] into a set.
+ * Try to handle that too.
+ *
+ * The code for \ handles \[ and \].
+ */
+
+ want_regexp = FALSE;
+ tok = tokstart;
+ for (;;) {
+ c = nextc();
+ switch (c) {
+ case '[':
+ /* one day check for `.' and `=' too */
+ if ((c1 = nextc()) == ':' || in_brack == 0)
+ in_brack++;
+ pushback();
+ break;
+ case ']':
+ if (tokstart[0] == '['
+ && (tok == tokstart + 1
+ || (tok == tokstart + 2
+ && tokstart[1] == '^')))
+ /* do nothing */;
+ else
+ in_brack--;
+ break;
+ case '\\':
+ if ((c = nextc()) == EOF) {
+ yyerror("unterminated regexp ends with \\ at end of file");
+ return lasttok = REGEXP; /* kludge */
+ } else if (c == '\n') {
+ sourceline++;
+ continue;
+ } else {
+ tokadd('\\');
+ tokadd(c);
+ continue;
+ }
+ break;
+ case '/': /* end of the regexp */
+ if (in_brack > 0)
+ break;
+
+ pushback();
+ tokadd('\0');
+ yylval.sval = tokstart;
+ return lasttok = REGEXP;
+ case '\n':
+ pushback();
+ yyerror("unterminated regexp");
+ return lasttok = REGEXP; /* kludge */
+ case EOF:
+ yyerror("unterminated regexp at end of file");
+ return lasttok = REGEXP; /* kludge */
+ }
+ tokadd(c);
+ }
+ }
+retry:
+ while ((c = nextc()) == ' ' || c == '\t')
+ continue;
+
+ lexeme = lexptr ? lexptr - 1 : lexptr;
+ thisline = NULL;
+ tok = tokstart;
+ yylval.nodetypeval = Node_illegal;
+
+ switch (c) {
+ case EOF:
+ if (lasttok != NEWLINE) {
+ lasttok = NEWLINE;
+ if (do_lint && ! eof_warned) {
+ warning("source file does not end in newline");
+ eof_warned = TRUE;
+ }
+ return NEWLINE; /* fake it */
+ }
+ return 0;
+
+ case '\n':
+ sourceline++;
+ return lasttok = NEWLINE;
+
+ case '#': /* it's a comment */
+ while ((c = nextc()) != '\n') {
+ if (c == EOF) {
+ if (lasttok != NEWLINE) {
+ lasttok = NEWLINE;
+ if (do_lint && ! eof_warned) {
+ warning(
+ "source file does not end in newline");
+ eof_warned = TRUE;
+ }
+ return NEWLINE; /* fake it */
+ }
+ return 0;
+ }
+ }
+ sourceline++;
+ return lasttok = NEWLINE;
+
+ case '\\':
+#ifdef RELAXED_CONTINUATION
+ /*
+ * This code puports to allow comments and/or whitespace
+ * after the `\' at the end of a line used for continuation.
+ * Use it at your own risk. We think it's a bad idea, which
+ * is why it's not on by default.
+ */
+ if (! do_traditional) {
+ /* strip trailing white-space and/or comment */
+ while ((c = nextc()) == ' ' || c == '\t')
+ continue;
+ if (c == '#') {
+ if (do_lint)
+ warning(
+ "use of `\\ #...' line continuation is not portable");
+ while ((c = nextc()) != '\n')
+ if (c == EOF)
+ break;
+ }
+ pushback();
+ }
+#endif /* RELAXED_CONTINUATION */
+ if (nextc() == '\n') {
+ sourceline++;
+ goto retry;
+ } else {
+ yyerror("backslash not last character on line");
+ exit(1);
+ }
+ break;
+
+ case '$':
+ want_assign = TRUE;
+ return lasttok = '$';
+
+ case ':':
+ case '?':
+ allow_newline();
+ /* fall through */
+ case ')':
+ case ']':
+ case '(':
+ case '[':
+ case ';':
+ case '{':
+ case ',':
+ return lasttok = c;
+
+ case '*':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_times;
+ return lasttok = ASSIGNOP;
+ } else if (do_posix) {
+ pushback();
+ return lasttok = '*';
+ } else if (c == '*') {
+ /* make ** and **= aliases for ^ and ^= */
+ static int did_warn_op = FALSE, did_warn_assgn = FALSE;
+
+ if (nextc() == '=') {
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = TRUE;
+ warning("**= is not allowed by POSIX");
+ warning("operator `**=' is not supported in old awk");
+ }
+ yylval.nodetypeval = Node_assign_exp;
+ return ASSIGNOP;
+ } else {
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = TRUE;
+ warning("** is not allowed by POSIX");
+ warning("operator `**' is not supported in old awk");
+ }
+ return lasttok = '^';
+ }
+ }
+ pushback();
+ return lasttok = '*';
+
+ case '/':
+ if (want_assign) {
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_quotient;
+ return lasttok = ASSIGNOP;
+ }
+ pushback();
+ }
+ return lasttok = '/';
+
+ case '%':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_mod;
+ return lasttok = ASSIGNOP;
+ }
+ pushback();
+ return lasttok = '%';
+
+ case '^':
+ {
+ static int did_warn_op = FALSE, did_warn_assgn = FALSE;
+
+ if (nextc() == '=') {
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = TRUE;
+ warning("operator `^=' is not supported in old awk");
+ }
+ yylval.nodetypeval = Node_assign_exp;
+ return lasttok = ASSIGNOP;
+ }
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = TRUE;
+ warning("operator `^' is not supported in old awk");
+ }
+ return lasttok = '^';
+ }
+
+ case '+':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_plus;
+ return lasttok = ASSIGNOP;
+ }
+ if (c == '+')
+ return lasttok = INCREMENT;
+ pushback();
+ return lasttok = '+';
+
+ case '!':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_notequal;
+ return lasttok = RELOP;
+ }
+ if (c == '~') {
+ yylval.nodetypeval = Node_nomatch;
+ want_assign = FALSE;
+ return lasttok = MATCHOP;
+ }
+ pushback();
+ return lasttok = '!';
+
+ case '<':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_leq;
+ return lasttok = RELOP;
+ }
+ yylval.nodetypeval = Node_less;
+ pushback();
+ return lasttok = '<';
+
+ case '=':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_equal;
+ return lasttok = RELOP;
+ }
+ yylval.nodetypeval = Node_assign;
+ pushback();
+ return lasttok = ASSIGNOP;
+
+ case '>':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_geq;
+ return lasttok = RELOP;
+ } else if (c == '>') {
+ yylval.nodetypeval = Node_redirect_append;
+ return lasttok = APPEND_OP;
+ }
+ yylval.nodetypeval = Node_greater;
+ pushback();
+ return lasttok = '>';
+
+ case '~':
+ yylval.nodetypeval = Node_match;
+ want_assign = FALSE;
+ return lasttok = MATCHOP;
+
+ case '}':
+ /*
+ * Added did newline stuff. Easier than
+ * hacking the grammar.
+ */
+ if (did_newline) {
+ did_newline = FALSE;
+ return lasttok = c;
+ }
+ did_newline++;
+ --lexptr; /* pick up } next time */
+ return lasttok = NEWLINE;
+
+ case '"':
+ esc_seen = FALSE;
+ while ((c = nextc()) != '"') {
+ if (c == '\n') {
+ pushback();
+ yyerror("unterminated string");
+ exit(1);
+ }
+ if (c == '\\') {
+ c = nextc();
+ if (c == '\n') {
+ sourceline++;
+ continue;
+ }
+ esc_seen = TRUE;
+ tokadd('\\');
+ }
+ if (c == EOF) {
+ pushback();
+ yyerror("unterminated string");
+ exit(1);
+ }
+ tokadd(c);
+ }
+ yylval.nodeval = make_str_node(tokstart,
+ tok - tokstart, esc_seen ? SCAN : 0);
+ yylval.nodeval->flags |= PERM;
+ return lasttok = YSTRING;
+
+ case '-':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_minus;
+ return lasttok = ASSIGNOP;
+ }
+ if (c == '-')
+ return lasttok = DECREMENT;
+ pushback();
+ return lasttok = '-';
+
+ case '.':
+ c = nextc();
+ pushback();
+ if (! isdigit(c))
+ return lasttok = '.';
+ else
+ c = '.';
+ /* FALL THROUGH */
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* It's a number */
+ for (;;) {
+ int gotnumber = FALSE;
+
+ tokadd(c);
+ switch (c) {
+#ifdef BITOPS
+ case 'x':
+ case 'X':
+ if (do_traditional)
+ goto done;
+ if (tok == tokstart + 2)
+ inhex = TRUE;
+ break;
+#endif /* BITOTS */
+ case '.':
+ if (seen_point) {
+ gotnumber = TRUE;
+ break;
+ }
+ seen_point = TRUE;
+ break;
+ case 'e':
+ case 'E':
+ if (inhex)
+ break;
+ if (seen_e) {
+ gotnumber = TRUE;
+ break;
+ }
+ seen_e = TRUE;
+ if ((c = nextc()) == '-' || c == '+')
+ tokadd(c);
+ else
+ pushback();
+ break;
+#ifdef BITOPS
+ case 'a':
+ case 'A':
+ case 'b':
+ case 'B':
+ case 'c':
+ case 'C':
+ case 'D':
+ case 'd':
+ case 'f':
+ case 'F':
+ if (do_traditional || ! inhex)
+ goto done;
+ /* fall through */
+#endif
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ break;
+ default:
+ done:
+ gotnumber = TRUE;
+ }
+ if (gotnumber)
+ break;
+ c = nextc();
+ }
+ if (c != EOF)
+ pushback();
+ else if (do_lint && ! eof_warned) {
+ warning("source file does not end in newline");
+ eof_warned = TRUE;
+ }
+ tokadd('\0');
+#ifdef BITOPS
+ if (! do_traditional && isnondecimal(tokstart))
+ yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
+ else
+#endif /* BITOPS */
+ yylval.nodeval = make_number(atof(tokstart));
+ yylval.nodeval->flags |= PERM;
+ return lasttok = YNUMBER;
+
+ case '&':
+ if ((c = nextc()) == '&') {
+ yylval.nodetypeval = Node_and;
+ allow_newline();
+ want_assign = FALSE;
+ return lasttok = LEX_AND;
+ }
+ pushback();
+ return lasttok = '&';
+
+ case '|':
+ if ((c = nextc()) == '|') {
+ yylval.nodetypeval = Node_or;
+ allow_newline();
+ want_assign = FALSE;
+ return lasttok = LEX_OR;
+ }
+ pushback();
+ return lasttok = '|';
+ }
+
+ if (c != '_' && ! isalpha(c)) {
+ yyerror("Invalid char '%c' in expression\n", c);
+ exit(1);
+ }
+
+ /* it's some type of name-type-thing. Find its length. */
+ tok = tokstart;
+ while (is_identchar(c)) {
+ tokadd(c);
+ c = nextc();
+ }
+ tokadd('\0');
+ emalloc(tokkey, char *, tok - tokstart, "yylex");
+ memcpy(tokkey, tokstart, tok - tokstart);
+ if (c != EOF)
+ pushback();
+ else if (do_lint && ! eof_warned) {
+ warning("source file does not end in newline");
+ eof_warned = TRUE;
+ }
+
+ /* See if it is a special token. */
+ low = 0;
+ high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
+ while (low <= high) {
+ int i;
+
+ mid = (low + high) / 2;
+ c = *tokstart - tokentab[mid].operator[0];
+ i = c ? c : strcmp(tokstart, tokentab[mid].operator);
+
+ if (i < 0) /* token < mid */
+ high = mid - 1;
+ else if (i > 0) /* token > mid */
+ low = mid + 1;
+ else {
+ if (do_lint) {
+ if (tokentab[mid].flags & GAWKX)
+ warning("%s() is a gawk extension",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & RESX)
+ warning("%s() is a Bell Labs extension",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_POSIX)
+ warning("POSIX does not allow %s",
+ tokentab[mid].operator);
+ }
+ if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
+ warning("%s is not supported in old awk",
+ tokentab[mid].operator);
+ if ((do_traditional && (tokentab[mid].flags & GAWKX))
+ || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
+ break;
+ if (tokentab[mid].class == LEX_BUILTIN
+ || tokentab[mid].class == LEX_LENGTH
+ )
+ yylval.lval = mid;
+ else
+ yylval.nodetypeval = tokentab[mid].value;
+
+ free(tokkey);
+ return lasttok = tokentab[mid].class;
+ }
+ }
+
+ yylval.sval = tokkey;
+ if (*lexptr == '(')
+ return lasttok = FUNC_CALL;
+ else {
+ want_assign = TRUE;
+ return lasttok = NAME;
+ }
+}
+
+/* node_common --- common code for allocating a new node */
+
+static NODE *
+node_common(op)
+NODETYPE op;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = op;
+ r->flags = MALLOC;
+ /* if lookahead is NL, lineno is 1 too high */
+ if (lexeme && *lexeme == '\n')
+ r->source_line = sourceline - 1;
+ else
+ r->source_line = sourceline;
+ r->source_file = source;
+ return r;
+}
+
+/* node --- allocates a node with defined lnode and rnode. */
+
+NODE *
+node(left, op, right)
+NODE *left, *right;
+NODETYPE op;
+{
+ register NODE *r;
+
+ r = node_common(op);
+ r->lnode = left;
+ r->rnode = right;
+ return r;
+}
+
+/* snode --- allocate a node with defined subnode and proc for builtin
+ functions. Checks for arg. count and supplies defaults where
+ possible. */
+
+static NODE *
+snode(subn, op, idx)
+NODETYPE op;
+int idx;
+NODE *subn;
+{
+ register NODE *r;
+ register NODE *n;
+ int nexp = 0;
+ int args_allowed;
+
+ r = node_common(op);
+
+ /* traverse expression list to see how many args. given */
+ for (n = subn; n != NULL; n = n->rnode) {
+ nexp++;
+ if (nexp > 3)
+ break;
+ }
+
+ /* check against how many args. are allowed for this builtin */
+ args_allowed = tokentab[idx].flags & ARGS;
+ if (args_allowed && (args_allowed & A(nexp)) == 0)
+ fatal("%s() cannot have %d argument%c",
+ tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
+
+ r->proc = tokentab[idx].ptr;
+
+ /* special case processing for a few builtins */
+ /*
+ * FIXME: go through these to make sure that everything done
+ * here is really right. Move anything that's not into
+ * the corresponding routine.
+ */
+ if (nexp == 0 && r->proc == do_length) {
+ subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+ } else if (r->proc == do_match) {
+ if (subn->rnode->lnode->type != Node_regex)
+ subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
+ } else if (r->proc == do_sub || r->proc == do_gsub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 2)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
+ else if (subn->rnode->rnode->lnode->type == Node_val) {
+ if (do_lint)
+ warning("string literal as last arg of substitute");
+ } else if (! isassignable(subn->rnode->rnode->lnode))
+ yyerror("%s third parameter is not a changeable object",
+ r->proc == do_sub ? "sub" : "gsub");
+ } else if (r->proc == do_gensub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 3)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
+ } else if (r->proc == do_split) {
+ if (nexp == 2)
+ append_right(subn,
+ node(FS_node, Node_expression_list, (NODE *) NULL));
+ n = subn->rnode->rnode->lnode;
+ if (n->type != Node_regex)
+ subn->rnode->rnode->lnode = mk_rexp(n);
+ if (nexp == 2)
+ subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
+ }
+
+ r->subnode = subn;
+ return r;
+}
+
+/*
+ * mkrangenode:
+ * This allocates a Node_line_range node with defined condpair and
+ * zeroes the trigger word to avoid the temptation of assuming that calling
+ * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
+ * Otherwise like node().
+ */
+
+static NODE *
+mkrangenode(cpair)
+NODE *cpair;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_line_range;
+ r->condpair = cpair;
+ r->triggered = FALSE;
+ return r;
+}
+
+/* make_for_loop --- build a for loop */
+
+static NODE *
+make_for_loop(init, cond, incr)
+NODE *init, *cond, *incr;
+{
+ register FOR_LOOP_HEADER *r;
+ NODE *n;
+
+ emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
+ getnode(n);
+ n->type = Node_illegal;
+ r->init = init;
+ r->cond = cond;
+ r->incr = incr;
+ n->sub.nodep.r.hd = r;
+ return n;
+}
+
+/* dup_parms --- return TRUE if there are duplicate parameters */
+
+static int
+dup_parms(func)
+NODE *func;
+{
+ register NODE *np;
+ char *fname, **names;
+ int count, i, j, dups;
+ NODE *params;
+
+ if (func == NULL) /* error earlier */
+ return TRUE;
+
+ fname = func->param;
+ count = func->param_cnt;
+ params = func->rnode;
+
+ if (count == 0) /* no args, no problem */
+ return FALSE;
+
+ if (params == NULL) /* error earlier */
+ return TRUE;
+
+ emalloc(names, char **, count * sizeof(char *), "dup_parms");
+
+ i = 0;
+ for (np = params; np != NULL; np = np->rnode) {
+ if (np->param == NULL) { /* error earlier, give up, go home */
+ free(names);
+ return TRUE;
+ }
+ names[i++] = np->param;
+ }
+
+ dups = 0;
+ for (i = 1; i < count; i++) {
+ for (j = 0; j < i; j++) {
+ if (strcmp(names[i], names[j]) == 0) {
+ dups++;
+ error(
+ "function `%s': parameter #%d, `%s', duplicates parameter #%d",
+ fname, i+1, names[j], j+1);
+ }
+ }
+ }
+
+ free(names);
+ return (dups > 0 ? TRUE : FALSE);
+}
+
+/*
+ * install:
+ * Install a name in the symbol table, even if it is already there.
+ * Caller must check against redefinition if that is desired.
+ */
+
+NODE *
+install(name, value)
+char *name;
+NODE *value;
+{
+ register NODE *hp;
+ register size_t len;
+ register int bucket;
+
+ len = strlen(name);
+ bucket = hash(name, len, (unsigned long) HASHSIZE);
+ getnode(hp);
+ hp->type = Node_hashnode;
+ hp->hnext = variables[bucket];
+ variables[bucket] = hp;
+ hp->hlength = len;
+ hp->hvalue = value;
+ hp->hname = name;
+ hp->hvalue->vname = name;
+ return hp->hvalue;
+}
+
+/* lookup --- find the most recent hash node for name installed by install */
+
+NODE *
+lookup(name)
+const char *name;
+{
+ register NODE *bucket;
+ register size_t len;
+
+ len = strlen(name);
+ for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
+ bucket != NULL; bucket = bucket->hnext)
+ if (bucket->hlength == len && STREQN(bucket->hname, name, len))
+ return bucket->hvalue;
+
+ return NULL;
+}
+
+/*
+ * append_right:
+ * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
+ * a simple attempt at optimizing it.
+ */
+
+static NODE *
+append_right(list, new)
+NODE *list, *new;
+{
+ register NODE *oldlist;
+ static NODE *savefront = NULL, *savetail = NULL;
+
+ oldlist = list;
+ if (savefront == oldlist) {
+ savetail = savetail->rnode = new;
+ return oldlist;
+ } else
+ savefront = oldlist;
+ while (list->rnode != NULL)
+ list = list->rnode;
+ savetail = list->rnode = new;
+ return oldlist;
+}
+
+/*
+ * func_install:
+ * check if name is already installed; if so, it had better have Null value,
+ * in which case def is added as the value. Otherwise, install name with def
+ * as value.
+ */
+
+static void
+func_install(params, def)
+NODE *params;
+NODE *def;
+{
+ NODE *r;
+
+ pop_params(params->rnode);
+ pop_var(params, FALSE);
+ r = lookup(params->param);
+ if (r != NULL) {
+ fatal("function name `%s' previously defined", params->param);
+ } else
+ (void) install(params->param, node(params, Node_func, def));
+
+ func_use(params->param, FUNC_DEFINE);
+}
+
+/* pop_var --- remove a variable from the symbol table */
+
+static void
+pop_var(np, freeit)
+NODE *np;
+int freeit;
+{
+ register NODE *bucket, **save;
+ register size_t len;
+ char *name;
+
+ name = np->param;
+ len = strlen(name);
+ save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
+ for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
+ if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
+ *save = bucket->hnext;
+ freenode(bucket);
+ if (freeit)
+ free(np->param);
+ return;
+ }
+ save = &(bucket->hnext);
+ }
+}
+
+/* pop_params --- remove list of function parameters from symbol table */
+
+/*
+ * pop parameters out of the symbol table. do this in reverse order to
+ * avoid reading freed memory if there were duplicated parameters.
+ */
+static void
+pop_params(params)
+NODE *params;
+{
+ if (params == NULL)
+ return;
+ pop_params(params->rnode);
+ pop_var(params, TRUE);
+}
+
+/* make_param --- make NAME into a function parameter */
+
+static NODE *
+make_param(name)
+char *name;
+{
+ NODE *r;
+
+ getnode(r);
+ r->type = Node_param_list;
+ r->rnode = NULL;
+ r->param = name;
+ r->param_cnt = param_counter++;
+ return (install(name, r));
+}
+
+static struct fdesc {
+ char *name;
+ short used;
+ short defined;
+ struct fdesc *next;
+} *ftable[HASHSIZE];
+
+/* func_use --- track uses and definitions of functions */
+
+static void
+func_use(name, how)
+char *name;
+enum defref how;
+{
+ struct fdesc *fp;
+ int len;
+ int ind;
+
+ len = strlen(name);
+ ind = hash(name, len, HASHSIZE);
+
+ for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
+ if (strcmp(fp->name, name) == 0) {
+ if (how == FUNC_DEFINE)
+ fp->defined++;
+ else
+ fp->used++;
+ return;
+ }
+ }
+
+ /* not in the table, fall through to allocate a new one */
+
+ emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
+ memset(fp, '\0', sizeof(struct fdesc));
+ emalloc(fp->name, char *, len + 1, "func_use");
+ strcpy(fp->name, name);
+ if (how == FUNC_DEFINE)
+ fp->defined++;
+ else
+ fp->used++;
+ fp->next = ftable[ind];
+ ftable[ind] = fp;
+}
+
+/* check_funcs --- verify functions that are called but not defined */
+
+static void
+check_funcs()
+{
+ struct fdesc *fp, *next;
+ int i;
+
+ for (i = 0; i < HASHSIZE; i++) {
+ for (fp = ftable[i]; fp != NULL; fp = fp->next) {
+#ifdef REALLYMEAN
+ /* making this the default breaks old code. sigh. */
+ if (fp->defined == 0) {
+ error(
+ "function `%s' called but never defined", fp->name);
+ errcount++;
+ }
+#else
+ if (do_lint && fp->defined == 0)
+ warning(
+ "function `%s' called but never defined", fp->name);
+#endif
+ if (do_lint && fp->used == 0) {
+ warning("function `%s' defined but never called",
+ fp->name);
+ }
+ }
+ }
+
+ /* now let's free all the memory */
+ for (i = 0; i < HASHSIZE; i++) {
+ for (fp = ftable[i]; fp != NULL; fp = next) {
+ next = fp->next;
+ free(fp->name);
+ free(fp);
+ }
+ }
+}
+
+/* param_sanity --- look for parameters that are regexp constants */
+
+static void
+param_sanity(arglist)
+NODE *arglist;
+{
+ NODE *argp, *arg;
+ int i;
+
+ for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
+ arg = argp->lnode;
+ if (arg->type == Node_regex)
+ warning("regexp constant for parameter #%d yields boolean value", i);
+ }
+}
+
+/* variable --- make sure NAME is in the symbol table */
+
+NODE *
+variable(name, can_free, type)
+char *name;
+int can_free;
+NODETYPE type;
+{
+ register NODE *r;
+ static int env_loaded = FALSE;
+
+ if (! env_loaded && STREQ(name, "ENVIRON")) {
+ load_environ();
+ env_loaded = TRUE;
+ }
+ if ((r = lookup(name)) == NULL)
+ r = install(name, node(Nnull_string, type, (NODE *) NULL));
+ else if (can_free)
+ free(name);
+ return r;
+}
+
+/* mk_rexp --- make a regular expression constant */
+
+static NODE *
+mk_rexp(exp)
+NODE *exp;
+{
+ NODE *n;
+
+ if (exp->type == Node_regex)
+ return exp;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = exp;
+ n->re_text = NULL;
+ n->re_reg = NULL;
+ n->re_flags = 0;
+ n->re_cnt = 1;
+ return n;
+}
+
+/* isnoeffect --- when used as a statement, has no side effects */
+
+/*
+ * To be completely general, we should recursively walk the parse
+ * tree, to make sure that all the subexpressions also have no effect.
+ * Instead, we just weaken the actual warning that's printed, up above
+ * in the grammar.
+ */
+
+static int
+isnoeffect(type)
+NODETYPE type;
+{
+ switch (type) {
+ case Node_times:
+ case Node_quotient:
+ case Node_mod:
+ case Node_plus:
+ case Node_minus:
+ case Node_subscript:
+ case Node_concat:
+ case Node_exp:
+ case Node_unary_minus:
+ case Node_field_spec:
+ case Node_and:
+ case Node_or:
+ case Node_equal:
+ case Node_notequal:
+ case Node_less:
+ case Node_greater:
+ case Node_leq:
+ case Node_geq:
+ case Node_match:
+ case Node_nomatch:
+ case Node_not:
+ case Node_val:
+ case Node_in_array:
+ case Node_NF:
+ case Node_NR:
+ case Node_FNR:
+ case Node_FS:
+ case Node_RS:
+ case Node_FIELDWIDTHS:
+ case Node_IGNORECASE:
+ case Node_OFS:
+ case Node_ORS:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ return TRUE;
+ default:
+ break; /* keeps gcc -Wall happy */
+ }
+
+ return FALSE;
+}
+
+/* isassignable --- can this node be assigned to? */
+
+static int
+isassignable(n)
+register NODE *n;
+{
+ switch (n->type) {
+ case Node_var:
+ case Node_FIELDWIDTHS:
+ case Node_RS:
+ case Node_FS:
+ case Node_FNR:
+ case Node_NR:
+ case Node_NF:
+ case Node_IGNORECASE:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ case Node_ORS:
+ case Node_OFS:
+ case Node_field_spec:
+ case Node_subscript:
+ return TRUE;
+ case Node_param_list:
+ return ((n->flags & FUNC) == 0); /* ok if not func name */
+ default:
+ break; /* keeps gcc -Wall happy */
+ }
+ return FALSE;
+}
diff --git a/contrib/awk/builtin.c b/contrib/awk/builtin.c
new file mode 100644
index 0000000..0686041
--- /dev/null
+++ b/contrib/awk/builtin.c
@@ -0,0 +1,2048 @@
+/*
+ * builtin.c - Builtin functions and various utility procedures
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+#include "awk.h"
+#include <assert.h>
+#undef HUGE
+#undef CHARBITS
+#undef INTBITS
+#include <math.h>
+#include "random.h"
+
+/* can declare these, since we always use the random shipped with gawk */
+extern char *initstate P((unsigned seed, char *state, int n));
+extern char *setstate P((char *state));
+extern long random P((void));
+extern void srandom P((unsigned int seed));
+
+extern NODE **fields_arr;
+extern int output_is_tty;
+
+static NODE *sub_common P((NODE *tree, int how_many, int backdigs));
+NODE *format_tree P((const char *, int, NODE *));
+
+#ifdef _CRAY
+/* Work around a problem in conversion of doubles to exact integers. */
+#include <float.h>
+#define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
+#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
+
+/* Force the standard C compiler to use the library math functions. */
+extern double exp(double);
+double (*Exp)() = exp;
+#define exp(x) (*Exp)(x)
+extern double log(double);
+double (*Log)() = log;
+#define log(x) (*Log)(x)
+#else
+#define Floor(n) floor(n)
+#define Ceil(n) ceil(n)
+#endif
+
+#define DEFAULT_G_PRECISION 6
+
+#ifdef GFMT_WORKAROUND
+/* semi-temporary hack, mostly to gracefully handle VMS */
+static void sgfmt P((char *buf, const char *format, int alt,
+ int fwidth, int precision, double value));
+#endif /* GFMT_WORKAROUND */
+
+/*
+ * Since we supply the version of random(), we know what
+ * value to use here.
+ */
+#define GAWK_RANDOM_MAX 0x7fffffffL
+
+static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
+ const char *from, struct redirect *rp, int flush));
+
+/* efwrite --- like fwrite, but with error checking */
+
+static void
+efwrite(ptr, size, count, fp, from, rp, flush)
+const void *ptr;
+size_t size, count;
+FILE *fp;
+const char *from;
+struct redirect *rp;
+int flush;
+{
+ errno = 0;
+ if (fwrite(ptr, size, count, fp) != count)
+ goto wrerror;
+ if (flush
+ && ((fp == stdout && output_is_tty)
+ || (rp && (rp->flag & RED_NOBUF)))) {
+ fflush(fp);
+ if (ferror(fp))
+ goto wrerror;
+ }
+ return;
+
+wrerror:
+ fatal("%s to \"%s\" failed (%s)", from,
+ rp ? rp->value : "standard output",
+ errno ? strerror(errno) : "reason unknown");
+}
+
+/* do_exp --- exponential function */
+
+NODE *
+do_exp(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d, res;
+
+ tmp = tree_eval(tree->lnode);
+ d = force_number(tmp);
+ free_temp(tmp);
+ errno = 0;
+ res = exp(d);
+ if (errno == ERANGE)
+ warning("exp argument %g is out of range", d);
+ return tmp_number((AWKNUM) res);
+}
+
+/* stdfile --- return fp for a standard file */
+
+/*
+ * This function allows `fflush("/dev/stdout")' to work.
+ * The other files will be available via getredirect().
+ * /dev/stdin is not included, since fflush is only for output.
+ */
+
+static FILE *
+stdfile(name, len)
+char *name;
+size_t len;
+{
+ if (len == 11) {
+ if (STREQN(name, "/dev/stderr", 11))
+ return stderr;
+ else if (STREQN(name, "/dev/stdout", 11))
+ return stdout;
+ }
+
+ return NULL;
+}
+
+/* do_fflush --- flush output, either named file or pipe or everything */
+
+NODE *
+do_fflush(tree)
+NODE *tree;
+{
+ struct redirect *rp;
+ NODE *tmp;
+ FILE *fp;
+ int status = 0;
+ char *file;
+
+ /* fflush() --- flush stdout */
+ if (tree == NULL) {
+ status = fflush(stdout);
+ return tmp_number((AWKNUM) status);
+ }
+
+ tmp = tree_eval(tree->lnode);
+ tmp = force_string(tmp);
+ file = tmp->stptr;
+
+ /* fflush("") --- flush all */
+ if (tmp->stlen == 0) {
+ status = flush_io();
+ free_temp(tmp);
+ return tmp_number((AWKNUM) status);
+ }
+
+ rp = getredirect(tmp->stptr, tmp->stlen);
+ status = 1;
+ if (rp != NULL) {
+ if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
+ /* if (do_lint) */
+ warning(
+ "fflush: cannot flush: %s `%s' opened for reading, not writing",
+ (rp->flag & RED_PIPE) ? "pipe" : "file",
+ file);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) status);
+ }
+ fp = rp->fp;
+ if (fp != NULL)
+ status = fflush(fp);
+ } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
+ status = fflush(fp);
+ } else
+ warning("fflush: `%s' is not an open file or pipe", file);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) status);
+}
+
+/* do_index --- find index of a string */
+
+NODE *
+do_index(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ register char *p1, *p2;
+ register size_t l1, l2;
+ long ret;
+
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ force_string(s1);
+ force_string(s2);
+ p1 = s1->stptr;
+ p2 = s2->stptr;
+ l1 = s1->stlen;
+ l2 = s2->stlen;
+ ret = 0;
+
+ /* IGNORECASE will already be false if posix */
+ if (IGNORECASE) {
+ while (l1 > 0) {
+ if (l2 > l1)
+ break;
+ if (casetable[(int)*p1] == casetable[(int)*p2]
+ && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
+ ret = 1 + s1->stlen - l1;
+ break;
+ }
+ l1--;
+ p1++;
+ }
+ } else {
+ while (l1 > 0) {
+ if (l2 > l1)
+ break;
+ if (*p1 == *p2
+ && (l2 == 1 || STREQN(p1, p2, l2))) {
+ ret = 1 + s1->stlen - l1;
+ break;
+ }
+ l1--;
+ p1++;
+ }
+ }
+ free_temp(s1);
+ free_temp(s2);
+ return tmp_number((AWKNUM) ret);
+}
+
+/* double_to_int --- convert double to int, used several places */
+
+double
+double_to_int(d)
+double d;
+{
+ if (d >= 0)
+ d = Floor(d);
+ else
+ d = Ceil(d);
+ return d;
+}
+
+/* do_int --- convert double to int for awk */
+
+NODE *
+do_int(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d;
+
+ tmp = tree_eval(tree->lnode);
+ d = force_number(tmp);
+ d = double_to_int(d);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+/* do_length --- length of a string or $0 */
+
+NODE *
+do_length(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ size_t len;
+
+ tmp = tree_eval(tree->lnode);
+ len = force_string(tmp)->stlen;
+ free_temp(tmp);
+ return tmp_number((AWKNUM) len);
+}
+
+/* do_log --- the log function */
+
+NODE *
+do_log(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d, arg;
+
+ tmp = tree_eval(tree->lnode);
+ arg = (double) force_number(tmp);
+ if (arg < 0.0)
+ warning("log called with negative argument %g", arg);
+ d = log(arg);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+/*
+ * format_tree() formats nodes of a tree, starting with a left node,
+ * and accordingly to a fmt_string providing a format like in
+ * printf family from C library. Returns a string node which value
+ * is a formatted string. Called by sprintf function.
+ *
+ * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
+ * for taming this beast and making it compatible with ANSI C.
+ */
+
+NODE *
+format_tree(fmt_string, n0, carg)
+const char *fmt_string;
+int n0;
+register NODE *carg;
+{
+/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
+/* difference of pointers should be of ptrdiff_t type, but let us be kind */
+#define bchunk(s, l) if (l) { \
+ while ((l) > ofre) { \
+ long olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ memcpy(obufout, s, (size_t) (l)); \
+ obufout += (l); \
+ ofre -= (l); \
+}
+
+/* copy one byte from 's' to 'obufout' checking for space in the process */
+#define bchunk_one(s) { \
+ if (ofre <= 0) { \
+ long olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ *obufout++ = *s; \
+ --ofre; \
+}
+
+/* Is there space for something L big in the buffer? */
+#define chksize(l) if ((l) > ofre) { \
+ long olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ obufout = obuf + olen; \
+ ofre += osiz; \
+ osiz *= 2; \
+}
+
+/*
+ * Get the next arg to be formatted. If we've run out of args,
+ * return "" (Null string)
+ */
+#define parse_next_arg() { \
+ if (carg == NULL) { \
+ toofew = TRUE; \
+ break; \
+ } else { \
+ arg = tree_eval(carg->lnode); \
+ carg = carg->rnode; \
+ } \
+}
+
+ NODE *r;
+ int toofew = FALSE;
+ char *obuf, *obufout;
+ size_t osiz, ofre;
+ char *chbuf;
+ const char *s0, *s1;
+ int cs1;
+ NODE *arg;
+ long fw, prec;
+ int lj, alt, big, bigbig, small, have_prec, need_format;
+ long *cur = NULL;
+#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
+ long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
+#endif
+ unsigned long uval;
+ int sgn;
+ int base = 0;
+ char cpbuf[30]; /* if we have numbers bigger than 30 */
+ char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */
+ char *cp;
+ char *fill;
+ double tmpval;
+ char signchar = FALSE;
+ size_t len;
+ static char sp[] = " ";
+ static char zero_string[] = "0";
+ static char lchbuf[] = "0123456789abcdef";
+ static char Uchbuf[] = "0123456789ABCDEF";
+
+#define INITIAL_OUT_SIZE 512
+ emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
+ obufout = obuf;
+ osiz = INITIAL_OUT_SIZE;
+ ofre = osiz - 1;
+
+ need_format = FALSE;
+
+ s0 = s1 = fmt_string;
+ while (n0-- > 0) {
+ if (*s1 != '%') {
+ s1++;
+ continue;
+ }
+ need_format = TRUE;
+ bchunk(s0, s1 - s0);
+ s0 = s1;
+ cur = &fw;
+ fw = 0;
+ prec = 0;
+ have_prec = FALSE;
+ signchar = FALSE;
+ lj = alt = big = bigbig = small = FALSE;
+ fill = sp;
+ cp = cend;
+ chbuf = lchbuf;
+ s1++;
+
+retry:
+ if (n0-- <= 0) /* ran out early! */
+ break;
+
+ switch (cs1 = *s1++) {
+ case (-1): /* dummy case to allow for checking */
+check_pos:
+ if (cur != &fw)
+ break; /* reject as a valid format */
+ goto retry;
+ case '%':
+ need_format = FALSE;
+ bchunk_one("%");
+ s0 = s1;
+ break;
+
+ case '0':
+ if (lj)
+ goto retry;
+ if (cur == &fw)
+ fill = zero_string;
+ /* FALL through */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (cur == NULL)
+ break;
+ if (prec >= 0)
+ *cur = cs1 - '0';
+ /*
+ * with a negative precision *cur is already set
+ * to -1, so it will remain negative, but we have
+ * to "eat" precision digits in any case
+ */
+ while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
+ --n0;
+ *cur = *cur * 10 + *s1++ - '0';
+ }
+ if (prec < 0) /* negative precision is discarded */
+ have_prec = FALSE;
+ if (cur == &prec)
+ cur = NULL;
+ if (n0 == 0) /* badly formatted control string */
+ continue;
+ goto retry;
+ case '*':
+ if (cur == NULL)
+ break;
+ parse_next_arg();
+ *cur = force_number(arg);
+ free_temp(arg);
+ if (*cur < 0 && cur == &fw) {
+ *cur = -*cur;
+ lj++;
+ }
+ if (cur == &prec) {
+ if (*cur >= 0)
+ have_prec = TRUE;
+ else
+ have_prec = FALSE;
+ cur = NULL;
+ }
+ goto retry;
+ case ' ': /* print ' ' or '-' */
+ /* 'space' flag is ignored */
+ /* if '+' already present */
+ if (signchar != FALSE)
+ goto check_pos;
+ /* FALL THROUGH */
+ case '+': /* print '+' or '-' */
+ signchar = cs1;
+ goto check_pos;
+ case '-':
+ if (prec < 0)
+ break;
+ if (cur == &prec) {
+ prec = -1;
+ goto retry;
+ }
+ fill = sp; /* if left justified then other */
+ lj++; /* filling is ignored */
+ goto check_pos;
+ case '.':
+ if (cur != &fw)
+ break;
+ cur = &prec;
+ have_prec = TRUE;
+ goto retry;
+ case '#':
+ alt = TRUE;
+ goto check_pos;
+ case 'l':
+ if (big)
+ break;
+ else {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("`l' is meaningless in awk formats; ignored");
+ warned = TRUE;
+ }
+ if (do_posix)
+ fatal("'l' is not permitted in POSIX awk formats");
+ }
+ big = TRUE;
+ goto retry;
+ case 'L':
+ if (bigbig)
+ break;
+ else {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("`L' is meaningless in awk formats; ignored");
+ warned = TRUE;
+ }
+ if (do_posix)
+ fatal("'L' is not permitted in POSIX awk formats");
+ }
+ bigbig = TRUE;
+ goto retry;
+ case 'h':
+ if (small)
+ break;
+ else {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("`h' is meaningless in awk formats; ignored");
+ warned = TRUE;
+ }
+ if (do_posix)
+ fatal("'h' is not permitted in POSIX awk formats");
+ }
+ small = TRUE;
+ goto retry;
+ case 'c':
+ need_format = FALSE;
+ parse_next_arg();
+ /* user input that looks numeric is numeric */
+ if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
+ (void) force_number(arg);
+ if (arg->flags & NUMBER) {
+#ifdef sun386
+ tmp_uval = arg->numbr;
+ uval = (unsigned long) tmp_uval;
+#else
+ uval = (unsigned long) arg->numbr;
+#endif
+ cpbuf[0] = uval;
+ prec = 1;
+ cp = cpbuf;
+ goto pr_tail;
+ }
+ if (have_prec == FALSE)
+ prec = 1;
+ else if (prec > arg->stlen)
+ prec = arg->stlen;
+ cp = arg->stptr;
+ goto pr_tail;
+ case 's':
+ need_format = FALSE;
+ parse_next_arg();
+ arg = force_string(arg);
+ if (! have_prec || prec > arg->stlen)
+ prec = arg->stlen;
+ cp = arg->stptr;
+ goto pr_tail;
+ case 'd':
+ case 'i':
+ need_format = FALSE;
+ parse_next_arg();
+ tmpval = force_number(arg);
+ if (tmpval < 0) {
+ if (tmpval < LONG_MIN)
+ goto out_of_range;
+ sgn = TRUE;
+ uval = - (unsigned long) (long) tmpval;
+ } else {
+ /* Use !, so that NaNs are out of range.
+ The cast avoids a SunOS 4.1.x cc bug. */
+ if (! (tmpval <= (unsigned long) ULONG_MAX))
+ goto out_of_range;
+ sgn = FALSE;
+ uval = (unsigned long) tmpval;
+ }
+ do {
+ *--cp = (char) ('0' + uval % 10);
+ uval /= 10;
+ } while (uval > 0);
+ if (sgn)
+ *--cp = '-';
+ else if (signchar)
+ *--cp = signchar;
+ /*
+ * precision overrides '0' flags. however, for
+ * integer formats, precsion is minimum number of
+ * *digits*, not characters, thus we want to fill
+ * with zeroes.
+ */
+ if (have_prec)
+ fill = zero_string;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ if (fw > prec && ! lj && fill != sp
+ && (*cp == '-' || signchar)) {
+ bchunk_one(cp);
+ cp++;
+ prec--;
+ fw--;
+ }
+ goto pr_tail;
+ case 'X':
+ chbuf = Uchbuf; /* FALL THROUGH */
+ case 'x':
+ base += 6; /* FALL THROUGH */
+ case 'u':
+ base += 2; /* FALL THROUGH */
+ case 'o':
+ base += 8;
+ need_format = FALSE;
+ parse_next_arg();
+ tmpval = force_number(arg);
+ if (tmpval < 0) {
+ if (tmpval < LONG_MIN)
+ goto out_of_range;
+ uval = (unsigned long) (long) tmpval;
+ } else {
+ /* Use !, so that NaNs are out of range.
+ The cast avoids a SunOS 4.1.x cc bug. */
+ if (! (tmpval <= (unsigned long) ULONG_MAX))
+ goto out_of_range;
+ uval = (unsigned long) tmpval;
+ }
+ /*
+ * precision overrides '0' flags. however, for
+ * integer formats, precsion is minimum number of
+ * *digits*, not characters, thus we want to fill
+ * with zeroes.
+ */
+ if (have_prec)
+ fill = zero_string;
+ do {
+ *--cp = chbuf[uval % base];
+ uval /= base;
+ } while (uval > 0);
+ if (alt) {
+ if (base == 16) {
+ *--cp = cs1;
+ *--cp = '0';
+ if (fill != sp) {
+ bchunk(cp, 2);
+ cp += 2;
+ fw -= 2;
+ }
+ } else if (base == 8)
+ *--cp = '0';
+ }
+ base = 0;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ pr_tail:
+ if (! lj) {
+ while (fw > prec) {
+ bchunk_one(fill);
+ fw--;
+ }
+ }
+ bchunk(cp, (int) prec);
+ while (fw > prec) {
+ bchunk_one(fill);
+ fw--;
+ }
+ s0 = s1;
+ free_temp(arg);
+ break;
+
+ out_of_range:
+ /* out of range - emergency use of %g format */
+ cs1 = 'g';
+ goto format_float;
+
+ case 'g':
+ case 'G':
+ case 'e':
+ case 'f':
+ case 'E':
+ need_format = FALSE;
+ parse_next_arg();
+ tmpval = force_number(arg);
+ format_float:
+ free_temp(arg);
+ if (! have_prec)
+ prec = DEFAULT_G_PRECISION;
+ chksize(fw + prec + 9); /* 9 == slop */
+
+ cp = cpbuf;
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (signchar)
+ *cp++ = signchar;
+ if (alt)
+ *cp++ = '#';
+ if (fill != sp)
+ *cp++ = '0';
+ cp = strcpy(cp, "*.*") + 3;
+ *cp++ = cs1;
+ *cp = '\0';
+#ifndef GFMT_WORKAROUND
+ (void) sprintf(obufout, cpbuf,
+ (int) fw, (int) prec, (double) tmpval);
+#else /* GFMT_WORKAROUND */
+ if (cs1 == 'g' || cs1 == 'G')
+ sgfmt(obufout, cpbuf, (int) alt,
+ (int) fw, (int) prec, (double) tmpval);
+ else
+ (void) sprintf(obufout, cpbuf,
+ (int) fw, (int) prec, (double) tmpval);
+#endif /* GFMT_WORKAROUND */
+ len = strlen(obufout);
+ ofre -= len;
+ obufout += len;
+ s0 = s1;
+ break;
+ default:
+ break;
+ }
+ if (toofew)
+ fatal("%s\n\t`%s'\n\t%*s%s",
+ "not enough arguments to satisfy format string",
+ fmt_string, s1 - fmt_string - 2, "",
+ "^ ran out for this one"
+ );
+ }
+ if (do_lint) {
+ if (need_format)
+ warning(
+ "printf format specifier does not have control letter");
+ if (carg != NULL)
+ warning(
+ "too many arguments supplied for format string");
+ }
+ bchunk(s0, s1 - s0);
+ r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ return r;
+}
+
+/* do_sprintf --- perform sprintf */
+
+NODE *
+do_sprintf(tree)
+NODE *tree;
+{
+ NODE *r;
+ NODE *sfmt = force_string(tree_eval(tree->lnode));
+
+ r = format_tree(sfmt->stptr, sfmt->stlen, tree->rnode);
+ free_temp(sfmt);
+ return r;
+}
+
+/* do_printf --- perform printf, including redirection */
+
+void
+do_printf(tree)
+register NODE *tree;
+{
+ struct redirect *rp = NULL;
+ register FILE *fp;
+
+ if (tree->lnode == NULL) {
+ if (do_traditional) {
+ if (do_lint)
+ warning("printf: no arguments");
+ return; /* bwk accepts it silently */
+ }
+ fatal("printf: no arguments");
+ }
+
+ if (tree->rnode != NULL) {
+ int errflg; /* not used, sigh */
+
+ rp = redirect(tree->rnode, &errflg);
+ if (rp != NULL) {
+ fp = rp->fp;
+ if (fp == NULL)
+ return;
+ } else
+ return;
+ } else
+ fp = stdout;
+ tree = do_sprintf(tree->lnode);
+ efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE);
+ free_temp(tree);
+}
+
+/* do_sqrt --- do the sqrt function */
+
+NODE *
+do_sqrt(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double arg;
+
+ tmp = tree_eval(tree->lnode);
+ arg = (double) force_number(tmp);
+ free_temp(tmp);
+ if (arg < 0.0)
+ warning("sqrt called with negative argument %g", arg);
+ return tmp_number((AWKNUM) sqrt(arg));
+}
+
+/* do_substr --- do the substr function */
+
+NODE *
+do_substr(tree)
+NODE *tree;
+{
+ NODE *t1, *t2, *t3;
+ NODE *r;
+ register size_t indx;
+ size_t length;
+ double d_index, d_length;
+
+ t1 = force_string(tree_eval(tree->lnode));
+ t2 = tree_eval(tree->rnode->lnode);
+ d_index = force_number(t2);
+ free_temp(t2);
+
+ if (d_index < 1.0) {
+ if (do_lint)
+ warning("substr: start index %g invalid, using 1",
+ d_index);
+ d_index = 1;
+ }
+ if (do_lint && double_to_int(d_index) != d_index)
+ warning("substr: non-integer start index %g will be truncated",
+ d_index);
+
+ indx = d_index - 1; /* awk indices are from 1, C's are from 0 */
+
+ if (tree->rnode->rnode == NULL) { /* third arg. missing */
+ /* use remainder of string */
+ length = t1->stlen - indx;
+ } else {
+ t3 = tree_eval(tree->rnode->rnode->lnode);
+ d_length = force_number(t3);
+ free_temp(t3);
+ if (d_length <= 0.0) {
+ if (do_lint)
+ warning("substr: length %g is <= 0", d_length);
+ free_temp(t1);
+ return Nnull_string;
+ }
+ if (do_lint && double_to_int(d_length) != d_length)
+ warning(
+ "substr: non-integer length %g will be truncated",
+ d_length);
+ length = d_length;
+ }
+
+ if (t1->stlen == 0) {
+ if (do_lint)
+ warning("substr: source string is zero length");
+ free_temp(t1);
+ return Nnull_string;
+ }
+ if ((indx + length) > t1->stlen) {
+ if (do_lint)
+ warning(
+ "substr: length %d at position %d exceeds length of first argument (%d)",
+ length, indx+1, t1->stlen);
+ length = t1->stlen - indx;
+ }
+ if (indx >= t1->stlen) {
+ if (do_lint)
+ warning("substr: start index %d is past end of string",
+ indx+1);
+ free_temp(t1);
+ return Nnull_string;
+ }
+ r = tmp_string(t1->stptr + indx, length);
+ free_temp(t1);
+ return r;
+}
+
+/* do_strftime --- format a time stamp */
+
+NODE *
+do_strftime(tree)
+NODE *tree;
+{
+ NODE *t1, *t2, *ret;
+ struct tm *tm;
+ time_t fclock;
+ char *bufp;
+ size_t buflen, bufsize;
+ char buf[BUFSIZ];
+ static char def_format[] = "%a %b %d %H:%M:%S %Z %Y";
+ char *format;
+ int formatlen;
+
+ /* set defaults first */
+ format = def_format; /* traditional date format */
+ formatlen = strlen(format);
+ (void) time(&fclock); /* current time of day */
+
+ t1 = t2 = NULL;
+ if (tree != NULL) { /* have args */
+ if (tree->lnode != NULL) {
+ t1 = force_string(tree_eval(tree->lnode));
+ format = t1->stptr;
+ formatlen = t1->stlen;
+ if (formatlen == 0) {
+ if (do_lint)
+ warning("strftime called with empty format string");
+ free_temp(t1);
+ return tmp_string("", 0);
+ }
+ }
+
+ if (tree->rnode != NULL) {
+ t2 = tree_eval(tree->rnode->lnode);
+ fclock = (time_t) force_number(t2);
+ free_temp(t2);
+ }
+ }
+
+ tm = localtime(&fclock);
+
+ bufp = buf;
+ bufsize = sizeof(buf);
+ for (;;) {
+ *bufp = '\0';
+ buflen = strftime(bufp, bufsize, format, tm);
+ /*
+ * buflen can be zero EITHER because there's not enough
+ * room in the string, or because the control command
+ * goes to the empty string. Make a reasonable guess that
+ * if the buffer is 1024 times bigger than the length of the
+ * format string, it's not failing for lack of room.
+ * Thanks to Paul Eggert for pointing out this issue.
+ */
+ if (buflen > 0 || bufsize >= 1024 * formatlen)
+ break;
+ bufsize *= 2;
+ if (bufp == buf)
+ emalloc(bufp, char *, bufsize, "do_strftime");
+ else
+ erealloc(bufp, char *, bufsize, "do_strftime");
+ }
+ ret = tmp_string(bufp, buflen);
+ if (bufp != buf)
+ free(bufp);
+ if (t1)
+ free_temp(t1);
+ return ret;
+}
+
+/* do_systime --- get the time of day */
+
+NODE *
+do_systime(tree)
+NODE *tree;
+{
+ time_t lclock;
+
+ (void) time(&lclock);
+ return tmp_number((AWKNUM) lclock);
+}
+
+
+
+/* do_system --- run an external command */
+
+NODE *
+do_system(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ int ret = 0;
+ char *cmd;
+ char save;
+
+ (void) flush_io(); /* so output is synchronous with gawk's */
+ tmp = tree_eval(tree->lnode);
+ cmd = force_string(tmp)->stptr;
+
+ if (cmd && *cmd) {
+ /* insure arg to system is zero-terminated */
+
+ /*
+ * From: David Trueman <david@cs.dal.ca>
+ * To: arnold@cc.gatech.edu (Arnold Robbins)
+ * Date: Wed, 3 Nov 1993 12:49:41 -0400
+ *
+ * It may not be necessary to save the character, but
+ * I'm not sure. It would normally be the field
+ * separator. If the parse has not yet gone beyond
+ * that, it could mess up (although I doubt it). If
+ * FIELDWIDTHS is being used, it might be the first
+ * character of the next field. Unless someone wants
+ * to check it out exhaustively, I suggest saving it
+ * for now...
+ */
+ save = cmd[tmp->stlen];
+ cmd[tmp->stlen] = '\0';
+
+ ret = system(cmd);
+ ret = (ret >> 8) & 0xff;
+
+ cmd[tmp->stlen] = save;
+ }
+ free_temp(tmp);
+ return tmp_number((AWKNUM) ret);
+}
+
+extern NODE **fmt_list; /* declared in eval.c */
+
+/* do_print --- print items, separated by OFS, terminated with ORS */
+
+void
+do_print(tree)
+register NODE *tree;
+{
+ register NODE **t;
+ struct redirect *rp = NULL;
+ register FILE *fp;
+ int numnodes, i;
+ NODE *save;
+
+ if (tree->rnode) {
+ int errflg; /* not used, sigh */
+
+ rp = redirect(tree->rnode, &errflg);
+ if (rp != NULL) {
+ fp = rp->fp;
+ if (fp == NULL)
+ return;
+ } else
+ return;
+ } else
+ fp = stdout;
+
+ /*
+ * General idea is to evaluate all the expressions first and
+ * then print them, otherwise you get suprising behavior.
+ * See test/prtoeval.awk for an example program.
+ */
+ save = tree = tree->lnode;
+ for (numnodes = 0; tree != NULL; tree = tree->rnode)
+ numnodes++;
+ emalloc(t, NODE **, numnodes * sizeof(NODE *), "do_print");
+
+ tree = save;
+ for (i = 0; tree != NULL; i++, tree = tree->rnode) {
+ NODE *n;
+
+ /* Here lies the wumpus. R.I.P. */
+ n = tree_eval(tree->lnode);
+ t[i] = dupnode(n);
+ free_temp(n);
+
+ if (t[i]->flags & NUMBER) {
+ if (OFMTidx == CONVFMTidx)
+ (void) force_string(t[i]);
+ else
+ t[i] = format_val(OFMT, OFMTidx, t[i]);
+ }
+ }
+
+ for (i = 0; i < numnodes; i++) {
+ efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE);
+ unref(t[i]);
+ if (i != numnodes - 1) {
+ if (OFSlen > 0)
+ efwrite(OFS, sizeof(char), (size_t) OFSlen,
+ fp, "print", rp, FALSE);
+ }
+ }
+ if (ORSlen > 0)
+ efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
+ free(t);
+}
+
+/* do_tolower --- lower case a string */
+
+NODE *
+do_tolower(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ register unsigned char *cp, *cp2;
+
+ t1 = tree_eval(tree->lnode);
+ t1 = force_string(t1);
+ t2 = tmp_string(t1->stptr, t1->stlen);
+ for (cp = (unsigned char *)t2->stptr,
+ cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+ if (ISUPPER(*cp))
+ *cp = tolower(*cp);
+ free_temp(t1);
+ return t2;
+}
+
+/* do_toupper --- upper case a string */
+
+NODE *
+do_toupper(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ register unsigned char *cp, *cp2;
+
+ t1 = tree_eval(tree->lnode);
+ t1 = force_string(t1);
+ t2 = tmp_string(t1->stptr, t1->stlen);
+ for (cp = (unsigned char *)t2->stptr,
+ cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+ if (ISLOWER(*cp))
+ *cp = toupper(*cp);
+ free_temp(t1);
+ return t2;
+}
+
+/* do_atan2 --- do the atan2 function */
+
+NODE *
+do_atan2(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ double d1, d2;
+
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode->lnode);
+ d1 = force_number(t1);
+ d2 = force_number(t2);
+ free_temp(t1);
+ free_temp(t2);
+ return tmp_number((AWKNUM) atan2(d1, d2));
+}
+
+/* do_sin --- do the sin function */
+
+NODE *
+do_sin(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d;
+
+ tmp = tree_eval(tree->lnode);
+ d = sin((double) force_number(tmp));
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+/* do_cos --- do the cos function */
+
+NODE *
+do_cos(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d;
+
+ tmp = tree_eval(tree->lnode);
+ d = cos((double) force_number(tmp));
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+
+/* do_rand --- do the rand function */
+
+static int firstrand = TRUE;
+static char state[512];
+
+/* ARGSUSED */
+NODE *
+do_rand(tree)
+NODE *tree;
+{
+ if (firstrand) {
+ (void) initstate((unsigned) 1, state, sizeof state);
+ srandom(1);
+ firstrand = FALSE;
+ }
+ return tmp_number((AWKNUM) random() / GAWK_RANDOM_MAX);
+}
+
+/* do_srand --- seed the random number generator */
+
+NODE *
+do_srand(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ static long save_seed = 1;
+ long ret = save_seed; /* SVR4 awk srand returns previous seed */
+
+ if (firstrand) {
+ (void) initstate((unsigned) 1, state, sizeof state);
+ /* don't need to srandom(1), we're changing the seed below */
+ firstrand = FALSE;
+ } else
+ (void) setstate(state);
+
+ if (tree == NULL)
+ srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
+ else {
+ tmp = tree_eval(tree->lnode);
+ srandom((unsigned int) (save_seed = (long) force_number(tmp)));
+ free_temp(tmp);
+ }
+ return tmp_number((AWKNUM) ret);
+}
+
+/* do_match --- match a regexp, set RSTART and RLENGTH */
+
+NODE *
+do_match(tree)
+NODE *tree;
+{
+ NODE *t1;
+ int rstart;
+ AWKNUM rlength;
+ Regexp *rp;
+
+ t1 = force_string(tree_eval(tree->lnode));
+ tree = tree->rnode->lnode;
+ rp = re_update(tree);
+ rstart = research(rp, t1->stptr, 0, t1->stlen, TRUE);
+ if (rstart >= 0) { /* match succeded */
+ rstart++; /* 1-based indexing */
+ rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
+ } else { /* match failed */
+ rstart = 0;
+ rlength = -1.0;
+ }
+ free_temp(t1);
+ unref(RSTART_node->var_value);
+ RSTART_node->var_value = make_number((AWKNUM) rstart);
+ unref(RLENGTH_node->var_value);
+ RLENGTH_node->var_value = make_number(rlength);
+ return tmp_number((AWKNUM) rstart);
+}
+
+/* sub_common --- the common code (does the work) for sub, gsub, and gensub */
+
+/*
+ * Gsub can be tricksy; particularly when handling the case of null strings.
+ * The following awk code was useful in debugging problems. It is too bad
+ * that it does not readily translate directly into the C code, below.
+ *
+ * #! /usr/local/bin/mawk -f
+ *
+ * BEGIN {
+ * TRUE = 1; FALSE = 0
+ * print "--->", mygsub("abc", "b+", "FOO")
+ * print "--->", mygsub("abc", "x*", "X")
+ * print "--->", mygsub("abc", "b*", "X")
+ * print "--->", mygsub("abc", "c", "X")
+ * print "--->", mygsub("abc", "c+", "X")
+ * print "--->", mygsub("abc", "x*$", "X")
+ * }
+ *
+ * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
+ * {
+ * origstr = str;
+ * eosflag = nonzeroflag = FALSE
+ * while (match(str, regex)) {
+ * if (RLENGTH > 0) { # easy case
+ * nonzeroflag = TRUE
+ * if (RSTART == 1) { # match at front of string
+ * newstr = newstr replace
+ * } else {
+ * newstr = newstr substr(str, 1, RSTART-1) replace
+ * }
+ * str = substr(str, RSTART+RLENGTH)
+ * } else if (nonzeroflag) {
+ * # last match was non-zero in length, and at the
+ * # current character, we get a zero length match,
+ * # which we don't really want, so skip over it
+ * newstr = newstr substr(str, 1, 1)
+ * str = substr(str, 2)
+ * nonzeroflag = FALSE
+ * } else {
+ * # 0-length match
+ * if (RSTART == 1) {
+ * newstr = newstr replace substr(str, 1, 1)
+ * str = substr(str, 2)
+ * } else {
+ * return newstr str replace
+ * }
+ * }
+ * if (length(str) == 0)
+ * if (eosflag)
+ * break;
+ * else
+ * eosflag = TRUE
+ * }
+ * if (length(str) > 0)
+ * newstr = newstr str # rest of string
+ *
+ * return newstr
+ * }
+ */
+
+/*
+ * NB: `howmany' conflicts with a SunOS macro in <sys/param.h>.
+ */
+
+static NODE *
+sub_common(tree, how_many, backdigs)
+NODE *tree;
+int how_many, backdigs;
+{
+ register char *scan;
+ register char *bp, *cp;
+ char *buf;
+ size_t buflen;
+ register char *matchend;
+ register size_t len;
+ char *matchstart;
+ char *text;
+ size_t textlen;
+ char *repl;
+ char *replend;
+ size_t repllen;
+ int sofar;
+ int ampersands;
+ int matches = 0;
+ Regexp *rp;
+ NODE *s; /* subst. pattern */
+ NODE *t; /* string to make sub. in; $0 if none given */
+ NODE *tmp;
+ NODE **lhs = &tree; /* value not used -- just different from NULL */
+ int priv = FALSE;
+ Func_ptr after_assign = NULL;
+
+ int global = (how_many == -1);
+ long current;
+ int lastmatchnonzero;
+
+ tmp = tree->lnode;
+ rp = re_update(tmp);
+
+ tree = tree->rnode;
+ s = tree->lnode;
+
+ tree = tree->rnode;
+ tmp = tree->lnode;
+ t = force_string(tree_eval(tmp));
+
+ /* do the search early to avoid work on non-match */
+ if (research(rp, t->stptr, 0, t->stlen, TRUE) == -1 ||
+ RESTART(rp, t->stptr) > t->stlen) {
+ free_temp(t);
+ return tmp_number((AWKNUM) 0.0);
+ }
+
+ if (tmp->type == Node_val)
+ lhs = NULL;
+ else
+ lhs = get_lhs(tmp, &after_assign);
+ t->flags |= STRING;
+ /*
+ * create a private copy of the string
+ */
+ if (t->stref > 1 || (t->flags & (PERM|FIELD)) != 0) {
+ unsigned int saveflags;
+
+ saveflags = t->flags;
+ t->flags &= ~MALLOC;
+ tmp = dupnode(t);
+ t->flags = saveflags;
+ t = tmp;
+ priv = TRUE;
+ }
+ text = t->stptr;
+ textlen = t->stlen;
+ buflen = textlen + 2;
+
+ s = force_string(tree_eval(s));
+ repl = s->stptr;
+ replend = repl + s->stlen;
+ repllen = replend - repl;
+ emalloc(buf, char *, buflen + 2, "sub_common");
+ buf[buflen] = '\0';
+ buf[buflen + 1] = '\0';
+ ampersands = 0;
+ for (scan = repl; scan < replend; scan++) {
+ if (*scan == '&') {
+ repllen--;
+ ampersands++;
+ } else if (*scan == '\\') {
+ if (backdigs) { /* gensub, behave sanely */
+ if (ISDIGIT(scan[1])) {
+ ampersands++;
+ scan++;
+ } else { /* \q for any q --> q */
+ repllen--;
+ scan++;
+ }
+ } else { /* (proposed) posix '96 mode */
+ if (strncmp(scan, "\\\\\\&", 4) == 0) {
+ /* \\\& --> \& */
+ repllen -= 2;
+ scan += 3;
+ } else if (strncmp(scan, "\\\\&", 3) == 0) {
+ /* \\& --> \<string> */
+ ampersands++;
+ repllen--;
+ scan += 2;
+ } else if (scan[1] == '&') {
+ /* \& --> & */
+ repllen--;
+ scan++;
+ } /* else
+ leave alone, it goes into the output */
+ }
+ }
+ }
+
+ lastmatchnonzero = FALSE;
+ bp = buf;
+ for (current = 1;; current++) {
+ matches++;
+ matchstart = t->stptr + RESTART(rp, t->stptr);
+ matchend = t->stptr + REEND(rp, t->stptr);
+
+ /*
+ * create the result, copying in parts of the original
+ * string
+ */
+ len = matchstart - text + repllen
+ + ampersands * (matchend - matchstart);
+ sofar = bp - buf;
+ while (buflen < (sofar + len + 1)) {
+ buflen *= 2;
+ erealloc(buf, char *, buflen, "sub_common");
+ bp = buf + sofar;
+ }
+ for (scan = text; scan < matchstart; scan++)
+ *bp++ = *scan;
+ if (global || current == how_many) {
+ /*
+ * If the current match matched the null string,
+ * and the last match didn't and did a replacement,
+ * then skip this one.
+ */
+ if (lastmatchnonzero && matchstart == matchend) {
+ lastmatchnonzero = FALSE;
+ goto empty;
+ }
+ /*
+ * If replacing all occurrences, or this is the
+ * match we want, copy in the replacement text,
+ * making substitutions as we go.
+ */
+ for (scan = repl; scan < replend; scan++)
+ if (*scan == '&')
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ else if (*scan == '\\') {
+ if (backdigs) { /* gensub, behave sanely */
+ if (ISDIGIT(scan[1])) {
+ int dig = scan[1] - '0';
+ char *start, *end;
+
+ start = t->stptr
+ + SUBPATSTART(rp, t->stptr, dig);
+ end = t->stptr
+ + SUBPATEND(rp, t->stptr, dig);
+
+ for (cp = start; cp < end; cp++)
+ *bp++ = *cp;
+ scan++;
+ } else /* \q for any q --> q */
+ *bp++ = *++scan;
+ } else { /* posix '96 mode, bleah */
+ if (strncmp(scan, "\\\\\\&", 4) == 0) {
+ /* \\\& --> \& */
+ *bp++ = '\\';
+ *bp++ = '&';
+ scan += 3;
+ } else if (strncmp(scan, "\\\\&", 3) == 0) {
+ /* \\& --> \<string> */
+ *bp++ = '\\';
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ scan += 2;
+ } else if (scan[1] == '&') {
+ /* \& --> & */
+ *bp++ = '&';
+ scan++;
+ } else
+ *bp++ = *scan;
+ }
+ } else
+ *bp++ = *scan;
+ if (matchstart != matchend)
+ lastmatchnonzero = TRUE;
+ } else {
+ /*
+ * don't want this match, skip over it by copying
+ * in current text.
+ */
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ }
+ empty:
+ /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
+ if (matchstart == matchend && matchend < text + textlen) {
+ *bp++ = *matchend;
+ matchend++;
+ }
+ textlen = text + textlen - matchend;
+ text = matchend;
+
+ if ((current >= how_many && !global)
+ || ((long) textlen <= 0 && matchstart == matchend)
+ || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1)
+ break;
+
+ }
+ sofar = bp - buf;
+ if (buflen - sofar - textlen - 1) {
+ buflen = sofar + textlen + 2;
+ erealloc(buf, char *, buflen, "sub_common");
+ bp = buf + sofar;
+ }
+ for (scan = matchend; scan < text + textlen; scan++)
+ *bp++ = *scan;
+ *bp = '\0';
+ textlen = bp - buf;
+ free(t->stptr);
+ t->stptr = buf;
+ t->stlen = textlen;
+
+ free_temp(s);
+ if (matches > 0 && lhs) {
+ if (priv) {
+ unref(*lhs);
+ *lhs = t;
+ }
+ if (after_assign != NULL)
+ (*after_assign)();
+ t->flags &= ~(NUM|NUMBER);
+ }
+ return tmp_number((AWKNUM) matches);
+}
+
+/* do_gsub --- global substitution */
+
+NODE *
+do_gsub(tree)
+NODE *tree;
+{
+ return sub_common(tree, -1, FALSE);
+}
+
+/* do_sub --- single substitution */
+
+NODE *
+do_sub(tree)
+NODE *tree;
+{
+ return sub_common(tree, 1, FALSE);
+}
+
+/* do_gensub --- fix up the tree for sub_common for the gensub function */
+
+NODE *
+do_gensub(tree)
+NODE *tree;
+{
+ NODE n1, n2, n3, *t, *tmp, *target, *ret;
+ long how_many = 1; /* default is one substitution */
+ double d;
+
+ /*
+ * We have to pull out the value of the global flag, and
+ * build up a tree without the flag in it, turning it into the
+ * kind of tree that sub_common() expects. It helps to draw
+ * a picture of this ...
+ */
+ n1 = *tree;
+ n2 = *(tree->rnode);
+ n1.rnode = & n2;
+
+ t = tree_eval(n2.rnode->lnode); /* value of global flag */
+
+ tmp = force_string(tree_eval(n2.rnode->rnode->lnode)); /* target */
+
+ /*
+ * We make copy of the original target string, and pass that
+ * in to sub_common() as the target to make the substitution in.
+ * We will then return the result string as the return value of
+ * this function.
+ */
+ target = make_string(tmp->stptr, tmp->stlen);
+ free_temp(tmp);
+
+ n3 = *(n2.rnode->rnode);
+ n3.lnode = target;
+ n2.rnode = & n3;
+
+ if ((t->flags & (STR|STRING)) != 0) {
+ if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
+ how_many = -1;
+ else
+ how_many = 1;
+ } else {
+ d = force_number(t);
+ if (d > 0)
+ how_many = d;
+ else
+ how_many = 1;
+ }
+
+ free_temp(t);
+
+ ret = sub_common(&n1, how_many, TRUE);
+ free_temp(ret);
+
+ /*
+ * Note that we don't care what sub_common() returns, since the
+ * easiest thing for the programmer is to return the string, even
+ * if no substitutions were done.
+ */
+ target->flags |= TEMP;
+ return target;
+}
+
+#ifdef GFMT_WORKAROUND
+/*
+ * printf's %g format [can't rely on gcvt()]
+ * caveat: don't use as argument to *printf()!
+ * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
+ */
+static void
+sgfmt(buf, format, alt, fwidth, prec, g)
+char *buf; /* return buffer; assumed big enough to hold result */
+const char *format;
+int alt; /* use alternate form flag */
+int fwidth; /* field width in a format */
+int prec; /* indicates desired significant digits, not decimal places */
+double g; /* value to format */
+{
+ char dform[40];
+ register char *gpos;
+ register char *d, *e, *p;
+ int again = FALSE;
+
+ strncpy(dform, format, sizeof dform - 1);
+ dform[sizeof dform - 1] = '\0';
+ gpos = strrchr(dform, '.');
+
+ if (g == 0.0 && ! alt) { /* easy special case */
+ *gpos++ = 'd';
+ *gpos = '\0';
+ (void) sprintf(buf, dform, fwidth, 0);
+ return;
+ }
+
+ /* advance to location of 'g' in the format */
+ while (*gpos && *gpos != 'g' && *gpos != 'G')
+ gpos++;
+
+ if (prec <= 0) /* negative precision is ignored */
+ prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
+
+ if (*gpos == 'G')
+ again = TRUE;
+ /* start with 'e' format (it'll provide nice exponent) */
+ *gpos = 'e';
+ prec--;
+ (void) sprintf(buf, dform, fwidth, prec, g);
+ if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
+ int expn = atoi(e+1); /* fetch exponent */
+ if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */
+ /* switch to 'f' format and re-do */
+ *gpos = 'f';
+ prec -= expn; /* decimal precision */
+ (void) sprintf(buf, dform, fwidth, prec, g);
+ e = buf + strlen(buf);
+ while (*--e == ' ')
+ continue;
+ e++;
+ }
+ else if (again)
+ *gpos = 'E';
+
+ /* if 'alt' in force, then trailing zeros are not removed */
+ if (! alt && (d = strrchr(buf, '.')) != NULL) {
+ /* throw away an excess of precision */
+ for (p = e; p > d && *--p == '0'; )
+ prec--;
+ if (d == p)
+ prec--;
+ if (prec < 0)
+ prec = 0;
+ /* and do that once again */
+ again = TRUE;
+ }
+ if (again)
+ (void) sprintf(buf, dform, fwidth, prec, g);
+ }
+}
+#endif /* GFMT_WORKAROUND */
+
+#ifdef BITOPS
+#define BITS_PER_BYTE 8 /* if not true, you lose. too bad. */
+
+/* do_lshift --- perform a << operation */
+
+NODE *
+do_lshift(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ unsigned long uval, ushift, result;
+ AWKNUM val, shift;
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ val = force_number(s1);
+ shift = force_number(s2);
+ free_temp(s1);
+ free_temp(s2);
+
+ if (do_lint) {
+ if (val < 0 || shift < 0)
+ warning("lshift(%lf, %lf): negative values will give strange results", val, shift);
+ if (double_to_int(val) != val || double_to_int(shift) != shift)
+ warning("lshift(%lf, %lf): fractional values will be truncated", val, shift);
+ if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
+ warning("lshift(%lf, %lf): too large shift value will give strange results", val, shift);
+ }
+
+ uval = (unsigned long) val;
+ ushift = (unsigned long) shift;
+
+ result = uval << ushift;
+ return tmp_number((AWKNUM) result);
+}
+
+/* do_rshift --- perform a >> operation */
+
+NODE *
+do_rshift(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ unsigned long uval, ushift, result;
+ AWKNUM val, shift;
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ val = force_number(s1);
+ shift = force_number(s2);
+ free_temp(s1);
+ free_temp(s2);
+
+ if (do_lint) {
+ if (val < 0 || shift < 0)
+ warning("rshift(%lf, %lf): negative values will give strange results", val, shift);
+ if (double_to_int(val) != val || double_to_int(shift) != shift)
+ warning("rshift(%lf, %lf): fractional values will be truncated", val, shift);
+ if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
+ warning("rshift(%lf, %lf): too large shift value will give strange results", val, shift);
+ }
+
+ uval = (unsigned long) val;
+ ushift = (unsigned long) shift;
+
+ result = uval >> ushift;
+ return tmp_number((AWKNUM) result);
+}
+
+/* do_and --- perform an & operation */
+
+NODE *
+do_and(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ unsigned long uleft, uright, result;
+ AWKNUM left, right;
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ left = force_number(s1);
+ right = force_number(s2);
+ free_temp(s1);
+ free_temp(s2);
+
+ if (do_lint) {
+ if (left < 0 || right < 0)
+ warning("and(%lf, %lf): negative values will give strange results", left, right);
+ if (double_to_int(left) != left || double_to_int(right) != right)
+ warning("and(%lf, %lf): fractional values will be truncated", left, right);
+ }
+
+ uleft = (unsigned long) left;
+ uright = (unsigned long) right;
+
+ result = uleft & uright;
+ return tmp_number((AWKNUM) result);
+}
+
+/* do_or --- perform an | operation */
+
+NODE *
+do_or(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ unsigned long uleft, uright, result;
+ AWKNUM left, right;
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ left = force_number(s1);
+ right = force_number(s2);
+ free_temp(s1);
+ free_temp(s2);
+
+ if (do_lint) {
+ if (left < 0 || right < 0)
+ warning("or(%lf, %lf): negative values will give strange results", left, right);
+ if (double_to_int(left) != left || double_to_int(right) != right)
+ warning("or(%lf, %lf): fractional values will be truncated", left, right);
+ }
+
+ uleft = (unsigned long) left;
+ uright = (unsigned long) right;
+
+ result = uleft | uright;
+ return tmp_number((AWKNUM) result);
+}
+
+/* do_xor --- perform an ^ operation */
+
+NODE *
+do_xor(tree)
+NODE *tree;
+{
+ NODE *s1, *s2;
+ unsigned long uleft, uright, result;
+ AWKNUM left, right;
+
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
+ left = force_number(s1);
+ right = force_number(s2);
+ free_temp(s1);
+ free_temp(s2);
+
+ if (do_lint) {
+ if (left < 0 || right < 0)
+ warning("xor(%lf, %lf): negative values will give strange results", left, right);
+ if (double_to_int(left) != left || double_to_int(right) != right)
+ warning("xor(%lf, %lf): fractional values will be truncated", left, right);
+ }
+
+ uleft = (unsigned long) left;
+ uright = (unsigned long) right;
+
+ result = uleft ^ uright;
+ return tmp_number((AWKNUM) result);
+}
+
+/* do_compl --- perform a ~ operation */
+
+NODE *
+do_compl(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d;
+ unsigned long uval;
+
+ tmp = tree_eval(tree->lnode);
+ d = force_number(tmp);
+ free_temp(tmp);
+
+ if (do_lint) {
+ if (uval < 0)
+ warning("compl(%lf): negative value will give strange results", d);
+ if (double_to_int(d) != d)
+ warning("compl(%lf): fractional value will be truncated", d);
+ }
+
+ uval = (unsigned long) d;
+ uval = ~ uval;
+ return tmp_number((AWKNUM) uval);
+}
+
+/* do_strtonum --- the strtonum function */
+
+NODE *
+do_strtonum(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ double d, arg;
+
+ tmp = tree_eval(tree->lnode);
+
+ if ((tmp->flags & (NUM|NUMBER)) != 0)
+ d = (double) force_number(tmp);
+ else if (isnondecimal(tmp->stptr))
+ d = nondec2awknum(tmp->stptr, tmp->stlen);
+ else
+ d = (double) force_number(tmp);
+
+ free_temp(tmp);
+ return tmp_number((AWKNUM) d);
+}
+#endif /* BITOPS */
+
+#if defined(BITOPS) || defined(NONDECDATA)
+/* nondec2awknum --- convert octal or hex value to double */
+
+/*
+ * Because of awk's concatenation rules and the way awk.y:yylex()
+ * collects a number, this routine has to be willing to stop on the
+ * first invalid character.
+ */
+
+AWKNUM
+nondec2awknum(str, len)
+char *str;
+size_t len;
+{
+ AWKNUM retval = 0.0;
+ char save;
+ short val;
+
+ if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
+ assert(len > 2);
+
+ for (str += 2, len -= 2; len > 0; len--, str++) {
+ switch (*str) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ val = *str - '0';
+ break;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ val = *str - 'a' + 10;
+ break;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ val = *str - 'A' + 10;
+ break;
+ default:
+ goto done;
+ }
+ retval = (retval * 16) + val;
+ }
+ } else if (*str == '0') {
+ for (; len > 0; len--) {
+ if (! isdigit(*str) || *str == '8' || *str == '9')
+ goto done;
+ retval = (retval * 8) + (*str - '0');
+ str++;
+ }
+ } else {
+ save = str[len];
+ retval = atof(str);
+ str[len] = save;
+ }
+done:
+ return retval;
+}
+#endif /* defined(BITOPS) || defined(NONDECDATA) */
diff --git a/contrib/awk/config.h b/contrib/awk/config.h
new file mode 100644
index 0000000..c745db1
--- /dev/null
+++ b/contrib/awk/config.h
@@ -0,0 +1,207 @@
+/* config.h. Generated automatically by configure. */
+/* configh.in. Generated automatically from configure.in by autoheader. */
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
+/*
+ * Copyright (C) 1995-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/* Define if on AIX 3.
+ System headers sometimes define this.
+ We just want to avoid a redefinition error message. */
+#ifndef _ALL_SOURCE
+/* #undef _ALL_SOURCE */
+#endif
+
+/* Define if using alloca.c. */
+/* #undef C_ALLOCA */
+
+/* Define if type char is unsigned and you are not using gcc. */
+#ifndef __CHAR_UNSIGNED__
+/* #undef __CHAR_UNSIGNED__ */
+#endif
+
+/* Define to empty if the keyword does not work. */
+/* #undef const */
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+ This function is required for alloca.c support on those systems. */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define to the type of elements in the array set by `getgroups'.
+ Usually this is either `int' or `gid_t'. */
+#define GETGROUPS_T gid_t
+
+/* Define if the `getpgrp' function takes no argument. */
+#define GETPGRP_VOID 1
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef gid_t */
+
+/* Define if you have alloca, as a function or macro. */
+#define HAVE_ALLOCA 1
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define if you don't have vprintf but do have _doprnt. */
+/* #undef HAVE_DOPRNT */
+
+/* Define if you have a working `mmap' system call. */
+#define HAVE_MMAP 1
+
+/* Define if your struct stat has st_blksize. */
+#define HAVE_ST_BLKSIZE 1
+
+/* Define if you have <sys/wait.h> that is POSIX.1 compatible. */
+#define HAVE_SYS_WAIT_H 1
+
+/* Define if your struct tm has tm_zone. */
+#define HAVE_TM_ZONE 1
+
+/* Define if you don't have tm_zone but do have the external array
+ tzname. */
+/* #undef HAVE_TZNAME */
+
+/* Define if you have the vprintf function. */
+#define HAVE_VPRINTF 1
+
+/* Define if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef pid_t */
+
+/* Define if the system does not provide POSIX.1 features except
+ with this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define if you need to in order for stat and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define as the return type of signal handlers (int or void). */
+#define RETSIGTYPE void
+
+/* Define to `unsigned' if <sys/types.h> doesn't define. */
+/* #undef size_t */
+
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+ */
+/* #undef STACK_DIRECTION */
+
+/* Define if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define if you can safely include both <sys/time.h> and <time.h>. */
+#define TIME_WITH_SYS_TIME 1
+
+/* Define if your <sys/time.h> declares struct tm. */
+/* #undef TM_IN_SYS_TIME */
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef uid_t */
+
+#define HAVE_STRINGIZE 1 /* can use ANSI # operator in cpp */
+#define REGEX_MALLOC 1 /* use malloc instead of alloca in regex.c */
+#define SPRINTF_RET int /* return type of sprintf */
+/* #undef BITOPS */ /* bitwise ops (undocumented feature) */
+/* #undef NONDECDATA */ /* non-decimal input data (undocumented feature) */
+
+/* Define if you have the fmod function. */
+#define HAVE_FMOD 1
+
+/* Define if you have the getpagesize function. */
+#define HAVE_GETPAGESIZE 1
+
+/* Define if you have the madvise function. */
+#define HAVE_MADVISE 1
+
+/* Define if you have the memcmp function. */
+#define HAVE_MEMCMP 1
+
+/* Define if you have the memcpy function. */
+#define HAVE_MEMCPY 1
+
+/* Define if you have the memset function. */
+#define HAVE_MEMSET 1
+
+/* Define if you have the setlocale function. */
+#define HAVE_SETLOCALE 1
+
+/* Define if you have the strchr function. */
+#define HAVE_STRCHR 1
+
+/* Define if you have the strerror function. */
+#define HAVE_STRERROR 1
+
+/* Define if you have the strftime function. */
+#define HAVE_STRFTIME 1
+
+/* Define if you have the strncasecmp function. */
+#define HAVE_STRNCASECMP 1
+
+/* Define if you have the strtod function. */
+#define HAVE_STRTOD 1
+
+/* Define if you have the system function. */
+#define HAVE_SYSTEM 1
+
+/* Define if you have the tzset function. */
+#define HAVE_TZSET 1
+
+/* Define if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define if you have the <locale.h> header file. */
+#define HAVE_LOCALE_H 1
+
+/* Define if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define if you have the <signum.h> header file. */
+/* #undef HAVE_SIGNUM_H */
+
+/* Define if you have the <stdarg.h> header file. */
+#define HAVE_STDARG_H 1
+
+/* Define if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define if you have the <strings.h> header file. */
+/* #undef HAVE_STRINGS_H */
+
+/* Define if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define if you have the m library (-lm). */
+#define HAVE_LIBM 1
+
+#include <custom.h> /* overrides for stuff autoconf can't deal with */
diff --git a/contrib/awk/custom.h b/contrib/awk/custom.h
new file mode 100644
index 0000000..833bb62
--- /dev/null
+++ b/contrib/awk/custom.h
@@ -0,0 +1,59 @@
+/*
+ * custom.h
+ *
+ * This file is for use on systems where Autoconf isn't quite able to
+ * get things right. It is included after config.h in awk.h, to override
+ * definitions from Autoconf that are erroneous. See the manual for more
+ * information.
+ *
+ * If you make additions to this file for your system, please send me
+ * the information, to arnold@gnu.ai.mit.edu.
+ */
+
+/*
+ * Copyright (C) 1995-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* for MIPS RiscOS, from Nelson H. F. Beebe, beebe@math.utah.edu */
+#if defined(__host_mips) && defined(SYSTYPE_BSD43)
+#undef HAVE_STRTOD
+#undef HAVE_STRERROR
+#endif
+
+/* for VMS POSIX, from Pat Rankin, rankin@eql.caltech.edu */
+#ifdef VMS_POSIX
+#undef VMS
+#include "vms/redirect.h"
+#endif
+
+/* For QNX, based on submission from Michael Hunter, mphunter@qnx.com */
+#ifdef __QNX__
+#define GETPGRP_VOID 1
+#endif
+
+/* For Amigas, from Fred Fish, fnf@ninemoons.com */
+#ifdef __amigaos__
+#define fork vfork
+#endif
+
+/* For sequent, based on email with Aron Griffis <agriffis@calypso.coat.com> */
+#ifdef _SEQUENT_
+#undef HAVE_MMAP
+#endif
diff --git a/contrib/awk/dfa.c b/contrib/awk/dfa.c
new file mode 100644
index 0000000..03dd9e7
--- /dev/null
+++ b/contrib/awk/dfa.c
@@ -0,0 +1,2606 @@
+/* dfa.c - deterministic extended regexp routines for GNU
+ Copyright (C) 1988 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
+
+/* Written June, 1988 by Mike Haertel
+ Modified July, 1988 by Arthur David Olson to assist BMG speedups */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+#include <sys/types.h>
+extern char *calloc(), *malloc(), *realloc();
+extern void free();
+#endif
+
+#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
+#include <string.h>
+#undef index
+#define index strchr
+#else
+#include <strings.h>
+#endif
+
+#ifndef DEBUG /* use the same approach as regex.c */
+#undef assert
+#define assert(e)
+#endif /* DEBUG */
+
+#ifndef isgraph
+#define isgraph(C) (isprint(C) && !isspace(C))
+#endif
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISALPHA(C) isalpha(C)
+#define ISUPPER(C) isupper(C)
+#define ISLOWER(C) islower(C)
+#define ISDIGIT(C) isdigit(C)
+#define ISXDIGIT(C) isxdigit(C)
+#define ISSPACE(C) isspace(C)
+#define ISPUNCT(C) ispunct(C)
+#define ISALNUM(C) isalnum(C)
+#define ISPRINT(C) isprint(C)
+#define ISGRAPH(C) isgraph(C)
+#define ISCNTRL(C) iscntrl(C)
+#else
+#define ISALPHA(C) (isascii(C) && isalpha(C))
+#define ISUPPER(C) (isascii(C) && isupper(C))
+#define ISLOWER(C) (isascii(C) && islower(C))
+#define ISDIGIT(C) (isascii(C) && isdigit(C))
+#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
+#define ISSPACE(C) (isascii(C) && isspace(C))
+#define ISPUNCT(C) (isascii(C) && ispunct(C))
+#define ISALNUM(C) (isascii(C) && isalnum(C))
+#define ISPRINT(C) (isascii(C) && isprint(C))
+#define ISGRAPH(C) (isascii(C) && isgraph(C))
+#define ISCNTRL(C) (isascii(C) && iscntrl(C))
+#endif
+
+#include "regex.h"
+#include "dfa.h"
+
+#ifdef __STDC__
+typedef void *ptr_t;
+#else
+typedef char *ptr_t;
+#ifndef const
+#define const
+#endif
+#endif
+
+static void dfamust _RE_ARGS((struct dfa *dfa));
+
+static ptr_t xcalloc _RE_ARGS((size_t n, size_t s));
+static ptr_t xmalloc _RE_ARGS((size_t n));
+static ptr_t xrealloc _RE_ARGS((ptr_t p, size_t n));
+#ifdef DEBUG
+static void prtok _RE_ARGS((token t));
+#endif
+static int tstbit _RE_ARGS((int b, charclass c));
+static void setbit _RE_ARGS((int b, charclass c));
+static void clrbit _RE_ARGS((int b, charclass c));
+static void copyset _RE_ARGS((charclass src, charclass dst));
+static void zeroset _RE_ARGS((charclass s));
+static void notset _RE_ARGS((charclass s));
+static int equal _RE_ARGS((charclass s1, charclass s2));
+static int charclass_index _RE_ARGS((charclass s));
+static int looking_at _RE_ARGS((const char *s));
+static token lex _RE_ARGS((void));
+static void addtok _RE_ARGS((token t));
+static void atom _RE_ARGS((void));
+static int nsubtoks _RE_ARGS((int tindex));
+static void copytoks _RE_ARGS((int tindex, int ntokens));
+static void closure _RE_ARGS((void));
+static void branch _RE_ARGS((void));
+static void regexp _RE_ARGS((int toplevel));
+static void copy _RE_ARGS((position_set *src, position_set *dst));
+static void insert _RE_ARGS((position p, position_set *s));
+static void merge _RE_ARGS((position_set *s1, position_set *s2, position_set *m));
+static void delete _RE_ARGS((position p, position_set *s));
+static int state_index _RE_ARGS((struct dfa *d, position_set *s,
+ int newline, int letter));
+static void build_state _RE_ARGS((int s, struct dfa *d));
+static void build_state_zero _RE_ARGS((struct dfa *d));
+static char *icatalloc _RE_ARGS((char *old, char *new));
+static char *icpyalloc _RE_ARGS((char *string));
+static char *istrstr _RE_ARGS((char *lookin, char *lookfor));
+static void ifree _RE_ARGS((char *cp));
+static void freelist _RE_ARGS((char **cpp));
+static char **enlist _RE_ARGS((char **cpp, char *new, size_t len));
+static char **comsubs _RE_ARGS((char *left, char *right));
+static char **addlists _RE_ARGS((char **old, char **new));
+static char **inboth _RE_ARGS((char **left, char **right));
+
+static ptr_t
+xcalloc(n, s)
+ size_t n;
+ size_t s;
+{
+ ptr_t r = calloc(n, s);
+
+ if (!r)
+ dfaerror("Memory exhausted");
+ return r;
+}
+
+static ptr_t
+xmalloc(n)
+ size_t n;
+{
+ ptr_t r = malloc(n);
+
+ assert(n != 0);
+ if (!r)
+ dfaerror("Memory exhausted");
+ return r;
+}
+
+static ptr_t
+xrealloc(p, n)
+ ptr_t p;
+ size_t n;
+{
+ ptr_t r = realloc(p, n);
+
+ assert(n != 0);
+ if (!r)
+ dfaerror("Memory exhausted");
+ return r;
+}
+
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
+#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
+#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
+
+/* Reallocate an array of type t if nalloc is too small for index. */
+#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
+ if ((index) >= (nalloc)) \
+ { \
+ while ((index) >= (nalloc)) \
+ (nalloc) *= 2; \
+ REALLOC(p, t, nalloc); \
+ }
+
+#ifdef DEBUG
+
+static void
+prtok(t)
+ token t;
+{
+ char *s;
+
+ if (t < 0)
+ fprintf(stderr, "END");
+ else if (t < NOTCHAR)
+ fprintf(stderr, "%c", t);
+ else
+ {
+ switch (t)
+ {
+ case EMPTY: s = "EMPTY"; break;
+ case BACKREF: s = "BACKREF"; break;
+ case BEGLINE: s = "BEGLINE"; break;
+ case ENDLINE: s = "ENDLINE"; break;
+ case BEGWORD: s = "BEGWORD"; break;
+ case ENDWORD: s = "ENDWORD"; break;
+ case LIMWORD: s = "LIMWORD"; break;
+ case NOTLIMWORD: s = "NOTLIMWORD"; break;
+ case QMARK: s = "QMARK"; break;
+ case STAR: s = "STAR"; break;
+ case PLUS: s = "PLUS"; break;
+ case CAT: s = "CAT"; break;
+ case OR: s = "OR"; break;
+ case ORTOP: s = "ORTOP"; break;
+ case LPAREN: s = "LPAREN"; break;
+ case RPAREN: s = "RPAREN"; break;
+ default: s = "CSET"; break;
+ }
+ fprintf(stderr, "%s", s);
+ }
+}
+#endif /* DEBUG */
+
+/* Stuff pertaining to charclasses. */
+
+static int
+tstbit(b, c)
+ int b;
+ charclass c;
+{
+ return c[b / INTBITS] & 1 << b % INTBITS;
+}
+
+static void
+setbit(b, c)
+ int b;
+ charclass c;
+{
+ c[b / INTBITS] |= 1 << b % INTBITS;
+}
+
+static void
+clrbit(b, c)
+ int b;
+ charclass c;
+{
+ c[b / INTBITS] &= ~(1 << b % INTBITS);
+}
+
+static void
+copyset(src, dst)
+ charclass src;
+ charclass dst;
+{
+ int i;
+
+ for (i = 0; i < CHARCLASS_INTS; ++i)
+ dst[i] = src[i];
+}
+
+static void
+zeroset(s)
+ charclass s;
+{
+ int i;
+
+ for (i = 0; i < CHARCLASS_INTS; ++i)
+ s[i] = 0;
+}
+
+static void
+notset(s)
+ charclass s;
+{
+ int i;
+
+ for (i = 0; i < CHARCLASS_INTS; ++i)
+ s[i] = ~s[i];
+}
+
+static int
+equal(s1, s2)
+ charclass s1;
+ charclass s2;
+{
+ int i;
+
+ for (i = 0; i < CHARCLASS_INTS; ++i)
+ if (s1[i] != s2[i])
+ return 0;
+ return 1;
+}
+
+/* A pointer to the current dfa is kept here during parsing. */
+static struct dfa *dfa;
+
+/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
+static int
+charclass_index(s)
+ charclass s;
+{
+ int i;
+
+ for (i = 0; i < dfa->cindex; ++i)
+ if (equal(s, dfa->charclasses[i]))
+ return i;
+ REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex);
+ ++dfa->cindex;
+ copyset(s, dfa->charclasses[i]);
+ return i;
+}
+
+/* Syntax bits controlling the behavior of the lexical analyzer. */
+static reg_syntax_t syntax_bits, syntax_bits_set;
+
+/* Flag for case-folding letters into sets. */
+static int case_fold;
+
+/* Entry point to set syntax options. */
+void
+dfasyntax(bits, fold)
+ reg_syntax_t bits;
+ int fold;
+{
+ syntax_bits_set = 1;
+ syntax_bits = bits;
+ case_fold = fold;
+}
+
+/* Lexical analyzer. All the dross that deals with the obnoxious
+ GNU Regex syntax bits is located here. The poor, suffering
+ reader is referred to the GNU Regex documentation for the
+ meaning of the @#%!@#%^!@ syntax bits. */
+
+static char *lexstart; /* Pointer to beginning of input string. */
+static char *lexptr; /* Pointer to next input character. */
+static int lexleft; /* Number of characters remaining. */
+static token lasttok; /* Previous token returned; initially END. */
+static int laststart; /* True if we're separated from beginning or (, |
+ only by zero-width characters. */
+static int parens; /* Count of outstanding left parens. */
+static int minrep, maxrep; /* Repeat counts for {m,n}. */
+
+/* Note that characters become unsigned here. */
+#define FETCH(c, eoferr) \
+ { \
+ if (! lexleft) \
+ if (eoferr != 0) \
+ dfaerror(eoferr); \
+ else \
+ return lasttok = END; \
+ (c) = (unsigned char) *lexptr++; \
+ --lexleft; \
+ }
+
+#ifdef __STDC__
+#define FUNC(F, P) static int F(int c) { return P(c); }
+#else
+#define FUNC(F, P) static int F(c) int c; { return P(c); }
+#endif
+
+FUNC(is_alpha, ISALPHA)
+FUNC(is_upper, ISUPPER)
+FUNC(is_lower, ISLOWER)
+FUNC(is_digit, ISDIGIT)
+FUNC(is_xdigit, ISXDIGIT)
+FUNC(is_space, ISSPACE)
+FUNC(is_punct, ISPUNCT)
+FUNC(is_alnum, ISALNUM)
+FUNC(is_print, ISPRINT)
+FUNC(is_graph, ISGRAPH)
+FUNC(is_cntrl, ISCNTRL)
+
+static int is_blank(c)
+int c;
+{
+ return (c == ' ' || c == '\t');
+}
+
+/* The following list maps the names of the Posix named character classes
+ to predicate functions that determine whether a given character is in
+ the class. The leading [ has already been eaten by the lexical analyzer. */
+static struct {
+ const char *name;
+ int (*pred) _RE_ARGS((int));
+} prednames[] = {
+ { ":alpha:]", is_alpha },
+ { ":upper:]", is_upper },
+ { ":lower:]", is_lower },
+ { ":digit:]", is_digit },
+ { ":xdigit:]", is_xdigit },
+ { ":space:]", is_space },
+ { ":punct:]", is_punct },
+ { ":alnum:]", is_alnum },
+ { ":print:]", is_print },
+ { ":graph:]", is_graph },
+ { ":cntrl:]", is_cntrl },
+ { ":blank:]", is_blank },
+ { 0 }
+};
+
+static int
+looking_at(s)
+ const char *s;
+{
+ size_t len;
+
+ len = strlen(s);
+ if (lexleft < len)
+ return 0;
+ return strncmp(s, lexptr, len) == 0;
+}
+
+static token
+lex()
+{
+ token c, c1, c2;
+ int backslash = 0, invert;
+ charclass ccl;
+ int i;
+
+ /* Basic plan: We fetch a character. If it's a backslash,
+ we set the backslash flag and go through the loop again.
+ On the plus side, this avoids having a duplicate of the
+ main switch inside the backslash case. On the minus side,
+ it means that just about every case begins with
+ "if (backslash) ...". */
+ for (i = 0; i < 2; ++i)
+ {
+ FETCH(c, 0);
+ switch (c)
+ {
+ case '\\':
+ if (backslash)
+ goto normal_char;
+ if (lexleft == 0)
+ dfaerror("Unfinished \\ escape");
+ backslash = 1;
+ break;
+
+ case '^':
+ if (backslash)
+ goto normal_char;
+ if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || lasttok == END
+ || lasttok == LPAREN
+ || lasttok == OR)
+ return lasttok = BEGLINE;
+ goto normal_char;
+
+ case '$':
+ if (backslash)
+ goto normal_char;
+ if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || lexleft == 0
+ || (syntax_bits & RE_NO_BK_PARENS
+ ? lexleft > 0 && *lexptr == ')'
+ : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
+ || (syntax_bits & RE_NO_BK_VBAR
+ ? lexleft > 0 && *lexptr == '|'
+ : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
+ || ((syntax_bits & RE_NEWLINE_ALT)
+ && lexleft > 0 && *lexptr == '\n'))
+ return lasttok = ENDLINE;
+ goto normal_char;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+ {
+ laststart = 0;
+ return lasttok = BACKREF;
+ }
+ goto normal_char;
+
+ case '`':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGLINE; /* FIXME: should be beginning of string */
+ goto normal_char;
+
+ case '\'':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDLINE; /* FIXME: should be end of string */
+ goto normal_char;
+
+ case '<':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGWORD;
+ goto normal_char;
+
+ case '>':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDWORD;
+ goto normal_char;
+
+ case 'b':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = LIMWORD;
+ goto normal_char;
+
+ case 'B':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = NOTLIMWORD;
+ goto normal_char;
+
+ case '?':
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = QMARK;
+
+ case '*':
+ if (backslash)
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = STAR;
+
+ case '+':
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = PLUS;
+
+ case '{':
+ if (!(syntax_bits & RE_INTERVALS))
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+ goto normal_char;
+ minrep = maxrep = 0;
+ /* Cases:
+ {M} - exact count
+ {M,} - minimum count, maximum is infinity
+ {,M} - 0 through M
+ {M,N} - M through N */
+ FETCH(c, "unfinished repeat count");
+ if (ISDIGIT(c))
+ {
+ minrep = c - '0';
+ for (;;)
+ {
+ FETCH(c, "unfinished repeat count");
+ if (!ISDIGIT(c))
+ break;
+ minrep = 10 * minrep + c - '0';
+ }
+ }
+ else if (c != ',')
+ dfaerror("malformed repeat count");
+ if (c == ',')
+ for (;;)
+ {
+ FETCH(c, "unfinished repeat count");
+ if (!ISDIGIT(c))
+ break;
+ maxrep = 10 * maxrep + c - '0';
+ }
+ else
+ maxrep = minrep;
+ if (!(syntax_bits & RE_NO_BK_BRACES))
+ {
+ if (c != '\\')
+ dfaerror("malformed repeat count");
+ FETCH(c, "unfinished repeat count");
+ }
+ if (c != '}')
+ dfaerror("malformed repeat count");
+ laststart = 0;
+ return lasttok = REPMN;
+
+ case '|':
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+ goto normal_char;
+ laststart = 1;
+ return lasttok = OR;
+
+ case '\n':
+ if (syntax_bits & RE_LIMITED_OPS
+ || backslash
+ || !(syntax_bits & RE_NEWLINE_ALT))
+ goto normal_char;
+ laststart = 1;
+ return lasttok = OR;
+
+ case '(':
+ if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ goto normal_char;
+ ++parens;
+ laststart = 1;
+ return lasttok = LPAREN;
+
+ case ')':
+ if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ goto normal_char;
+ if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ --parens;
+ laststart = 0;
+ return lasttok = RPAREN;
+
+ case '.':
+ if (backslash)
+ goto normal_char;
+ zeroset(ccl);
+ notset(ccl);
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ clrbit('\n', ccl);
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ clrbit('\0', ccl);
+ laststart = 0;
+ return lasttok = CSET + charclass_index(ccl);
+
+ case 'w':
+ case 'W':
+ if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ goto normal_char;
+ zeroset(ccl);
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (ISALNUM(c2))
+ setbit(c2, ccl);
+ setbit('_', ccl);
+ if (c == 'W')
+ notset(ccl);
+ laststart = 0;
+ return lasttok = CSET + charclass_index(ccl);
+
+ case '[':
+ if (backslash)
+ goto normal_char;
+ zeroset(ccl);
+ FETCH(c, "Unbalanced [");
+ if (c == '^')
+ {
+ FETCH(c, "Unbalanced [");
+ invert = 1;
+ }
+ else
+ invert = 0;
+ do
+ {
+ /* Nobody ever said this had to be fast. :-)
+ Note that if we're looking at some other [:...:]
+ construct, we just treat it as a bunch of ordinary
+ characters. We can do this because we assume
+ regex has checked for syntax errors before
+ dfa is ever called. */
+ if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
+ for (c1 = 0; prednames[c1].name; ++c1)
+ if (looking_at(prednames[c1].name))
+ {
+ int (*pred)() = prednames[c1].pred;
+ if (case_fold
+ && (pred == is_upper || pred == is_lower))
+ pred = is_alpha;
+
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if ((*pred)(c2))
+ setbit(c2, ccl);
+ lexptr += strlen(prednames[c1].name);
+ lexleft -= strlen(prednames[c1].name);
+ FETCH(c1, "Unbalanced [");
+ goto skip;
+ }
+ if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH(c, "Unbalanced [");
+ FETCH(c1, "Unbalanced [");
+ if (c1 == '-')
+ {
+ FETCH(c2, "Unbalanced [");
+ if (c2 == ']')
+ {
+ /* In the case [x-], the - is an ordinary hyphen,
+ which is left in c1, the lookahead character. */
+ --lexptr;
+ ++lexleft;
+ c2 = c;
+ }
+ else
+ {
+ if (c2 == '\\'
+ && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH(c2, "Unbalanced [");
+ FETCH(c1, "Unbalanced [");
+ }
+ }
+ else
+ c2 = c;
+ while (c <= c2)
+ {
+ setbit(c, ccl);
+ if (case_fold)
+ if (ISUPPER(c))
+ setbit(tolower(c), ccl);
+ else if (ISLOWER(c))
+ setbit(toupper(c), ccl);
+ ++c;
+ }
+ skip:
+ ;
+ }
+ while ((c = c1) != ']');
+ if (invert)
+ {
+ notset(ccl);
+ if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+ clrbit('\n', ccl);
+ }
+ laststart = 0;
+ return lasttok = CSET + charclass_index(ccl);
+
+ default:
+ normal_char:
+ laststart = 0;
+ if (case_fold && ISALPHA(c))
+ {
+ zeroset(ccl);
+ setbit(c, ccl);
+ if (isupper(c))
+ setbit(tolower(c), ccl);
+ else
+ setbit(toupper(c), ccl);
+ return lasttok = CSET + charclass_index(ccl);
+ }
+ return c;
+ }
+ }
+
+ /* The above loop should consume at most a backslash
+ and some other character. */
+ abort();
+ return END; /* keeps pedantic compilers happy. */
+}
+
+/* Recursive descent parser for regular expressions. */
+
+static token tok; /* Lookahead token. */
+static int depth; /* Current depth of a hypothetical stack
+ holding deferred productions. This is
+ used to determine the depth that will be
+ required of the real stack later on in
+ dfaanalyze(). */
+
+/* Add the given token to the parse tree, maintaining the depth count and
+ updating the maximum depth if necessary. */
+static void
+addtok(t)
+ token t;
+{
+ REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
+ dfa->tokens[dfa->tindex++] = t;
+
+ switch (t)
+ {
+ case QMARK:
+ case STAR:
+ case PLUS:
+ break;
+
+ case CAT:
+ case OR:
+ case ORTOP:
+ --depth;
+ break;
+
+ default:
+ ++dfa->nleaves;
+ case EMPTY:
+ ++depth;
+ break;
+ }
+ if (depth > dfa->depth)
+ dfa->depth = depth;
+}
+
+/* The grammar understood by the parser is as follows.
+
+ regexp:
+ regexp OR branch
+ branch
+
+ branch:
+ branch closure
+ closure
+
+ closure:
+ closure QMARK
+ closure STAR
+ closure PLUS
+ atom
+
+ atom:
+ <normal character>
+ CSET
+ BACKREF
+ BEGLINE
+ ENDLINE
+ BEGWORD
+ ENDWORD
+ LIMWORD
+ NOTLIMWORD
+ <empty>
+
+ The parser builds a parse tree in postfix form in an array of tokens. */
+
+static void
+atom()
+{
+ if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
+ || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
+ || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+ {
+ addtok(tok);
+ tok = lex();
+ }
+ else if (tok == LPAREN)
+ {
+ tok = lex();
+ regexp(0);
+ if (tok != RPAREN)
+ dfaerror("Unbalanced (");
+ tok = lex();
+ }
+ else
+ addtok(EMPTY);
+}
+
+/* Return the number of tokens in the given subexpression. */
+static int
+nsubtoks(tindex)
+int tindex;
+{
+ int ntoks1;
+
+ switch (dfa->tokens[tindex - 1])
+ {
+ default:
+ return 1;
+ case QMARK:
+ case STAR:
+ case PLUS:
+ return 1 + nsubtoks(tindex - 1);
+ case CAT:
+ case OR:
+ case ORTOP:
+ ntoks1 = nsubtoks(tindex - 1);
+ return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
+ }
+}
+
+/* Copy the given subexpression to the top of the tree. */
+static void
+copytoks(tindex, ntokens)
+ int tindex, ntokens;
+{
+ int i;
+
+ for (i = 0; i < ntokens; ++i)
+ addtok(dfa->tokens[tindex + i]);
+}
+
+static void
+closure()
+{
+ int tindex, ntokens, i;
+
+ atom();
+ while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
+ if (tok == REPMN)
+ {
+ ntokens = nsubtoks(dfa->tindex);
+ tindex = dfa->tindex - ntokens;
+ if (maxrep == 0)
+ addtok(PLUS);
+ if (minrep == 0)
+ addtok(QMARK);
+ for (i = 1; i < minrep; ++i)
+ {
+ copytoks(tindex, ntokens);
+ addtok(CAT);
+ }
+ for (; i < maxrep; ++i)
+ {
+ copytoks(tindex, ntokens);
+ addtok(QMARK);
+ addtok(CAT);
+ }
+ tok = lex();
+ }
+ else
+ {
+ addtok(tok);
+ tok = lex();
+ }
+}
+
+static void
+branch()
+{
+ closure();
+ while (tok != RPAREN && tok != OR && tok >= 0)
+ {
+ closure();
+ addtok(CAT);
+ }
+}
+
+static void
+regexp(toplevel)
+ int toplevel;
+{
+ branch();
+ while (tok == OR)
+ {
+ tok = lex();
+ branch();
+ if (toplevel)
+ addtok(ORTOP);
+ else
+ addtok(OR);
+ }
+}
+
+/* Main entry point for the parser. S is a string to be parsed, len is the
+ length of the string, so s can include NUL characters. D is a pointer to
+ the struct dfa to parse into. */
+void
+dfaparse(s, len, d)
+ char *s;
+ size_t len;
+ struct dfa *d;
+
+{
+ dfa = d;
+ lexstart = lexptr = s;
+ lexleft = len;
+ lasttok = END;
+ laststart = 1;
+ parens = 0;
+
+ if (! syntax_bits_set)
+ dfaerror("No syntax specified");
+
+ tok = lex();
+ depth = d->depth;
+
+ regexp(1);
+
+ if (tok != END)
+ dfaerror("Unbalanced )");
+
+ addtok(END - d->nregexps);
+ addtok(CAT);
+
+ if (d->nregexps)
+ addtok(ORTOP);
+
+ ++d->nregexps;
+}
+
+/* Some primitives for operating on sets of positions. */
+
+/* Copy one set to another; the destination must be large enough. */
+static void
+copy(src, dst)
+ position_set *src;
+ position_set *dst;
+{
+ int i;
+
+ for (i = 0; i < src->nelem; ++i)
+ dst->elems[i] = src->elems[i];
+ dst->nelem = src->nelem;
+}
+
+/* Insert a position in a set. Position sets are maintained in sorted
+ order according to index. If position already exists in the set with
+ the same index then their constraints are logically or'd together.
+ S->elems must point to an array large enough to hold the resulting set. */
+static void
+insert(p, s)
+ position p;
+ position_set *s;
+{
+ int i;
+ position t1, t2;
+
+ for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
+ continue;
+ if (i < s->nelem && p.index == s->elems[i].index)
+ s->elems[i].constraint |= p.constraint;
+ else
+ {
+ t1 = p;
+ ++s->nelem;
+ while (i < s->nelem)
+ {
+ t2 = s->elems[i];
+ s->elems[i++] = t1;
+ t1 = t2;
+ }
+ }
+}
+
+/* Merge two sets of positions into a third. The result is exactly as if
+ the positions of both sets were inserted into an initially empty set. */
+static void
+merge(s1, s2, m)
+ position_set *s1;
+ position_set *s2;
+ position_set *m;
+{
+ int i = 0, j = 0;
+
+ m->nelem = 0;
+ while (i < s1->nelem && j < s2->nelem)
+ if (s1->elems[i].index > s2->elems[j].index)
+ m->elems[m->nelem++] = s1->elems[i++];
+ else if (s1->elems[i].index < s2->elems[j].index)
+ m->elems[m->nelem++] = s2->elems[j++];
+ else
+ {
+ m->elems[m->nelem] = s1->elems[i++];
+ m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
+ }
+ while (i < s1->nelem)
+ m->elems[m->nelem++] = s1->elems[i++];
+ while (j < s2->nelem)
+ m->elems[m->nelem++] = s2->elems[j++];
+}
+
+/* Delete a position from a set. */
+static void
+delete(p, s)
+ position p;
+ position_set *s;
+{
+ int i;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (p.index == s->elems[i].index)
+ break;
+ if (i < s->nelem)
+ for (--s->nelem; i < s->nelem; ++i)
+ s->elems[i] = s->elems[i + 1];
+}
+
+/* Find the index of the state corresponding to the given position set with
+ the given preceding context, or create a new state if there is no such
+ state. Newline and letter tell whether we got here on a newline or
+ letter, respectively. */
+static int
+state_index(d, s, newline, letter)
+ struct dfa *d;
+ position_set *s;
+ int newline;
+ int letter;
+{
+ int hash = 0;
+ int constraint;
+ int i, j;
+
+ newline = newline ? 1 : 0;
+ letter = letter ? 1 : 0;
+
+ for (i = 0; i < s->nelem; ++i)
+ hash ^= s->elems[i].index + s->elems[i].constraint;
+
+ /* Try to find a state that exactly matches the proposed one. */
+ for (i = 0; i < d->sindex; ++i)
+ {
+ if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
+ || newline != d->states[i].newline || letter != d->states[i].letter)
+ continue;
+ for (j = 0; j < s->nelem; ++j)
+ if (s->elems[j].constraint
+ != d->states[i].elems.elems[j].constraint
+ || s->elems[j].index != d->states[i].elems.elems[j].index)
+ break;
+ if (j == s->nelem)
+ return i;
+ }
+
+ /* We'll have to create a new state. */
+ REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
+ d->states[i].hash = hash;
+ MALLOC(d->states[i].elems.elems, position, s->nelem);
+ copy(s, &d->states[i].elems);
+ d->states[i].newline = newline;
+ d->states[i].letter = letter;
+ d->states[i].backref = 0;
+ d->states[i].constraint = 0;
+ d->states[i].first_end = 0;
+ for (j = 0; j < s->nelem; ++j)
+ if (d->tokens[s->elems[j].index] < 0)
+ {
+ constraint = s->elems[j].constraint;
+ if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
+ || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
+ || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
+ || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
+ d->states[i].constraint |= constraint;
+ if (! d->states[i].first_end)
+ d->states[i].first_end = d->tokens[s->elems[j].index];
+ }
+ else if (d->tokens[s->elems[j].index] == BACKREF)
+ {
+ d->states[i].constraint = NO_CONSTRAINT;
+ d->states[i].backref = 1;
+ }
+
+ ++d->sindex;
+
+ return i;
+}
+
+/* Find the epsilon closure of a set of positions. If any position of the set
+ contains a symbol that matches the empty string in some context, replace
+ that position with the elements of its follow labeled with an appropriate
+ constraint. Repeat exhaustively until no funny positions are left.
+ S->elems must be large enough to hold the result. */
+static void epsclosure _RE_ARGS((position_set *s, struct dfa *d));
+
+static void
+epsclosure(s, d)
+ position_set *s;
+ struct dfa *d;
+{
+ int i, j;
+ int *visited;
+ position p, old;
+
+ MALLOC(visited, int, d->tindex);
+ for (i = 0; i < d->tindex; ++i)
+ visited[i] = 0;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (d->tokens[s->elems[i].index] >= NOTCHAR
+ && d->tokens[s->elems[i].index] != BACKREF
+ && d->tokens[s->elems[i].index] < CSET)
+ {
+ old = s->elems[i];
+ p.constraint = old.constraint;
+ delete(s->elems[i], s);
+ if (visited[old.index])
+ {
+ --i;
+ continue;
+ }
+ visited[old.index] = 1;
+ switch (d->tokens[old.index])
+ {
+ case BEGLINE:
+ p.constraint &= BEGLINE_CONSTRAINT;
+ break;
+ case ENDLINE:
+ p.constraint &= ENDLINE_CONSTRAINT;
+ break;
+ case BEGWORD:
+ p.constraint &= BEGWORD_CONSTRAINT;
+ break;
+ case ENDWORD:
+ p.constraint &= ENDWORD_CONSTRAINT;
+ break;
+ case LIMWORD:
+ p.constraint &= LIMWORD_CONSTRAINT;
+ break;
+ case NOTLIMWORD:
+ p.constraint &= NOTLIMWORD_CONSTRAINT;
+ break;
+ default:
+ break;
+ }
+ for (j = 0; j < d->follows[old.index].nelem; ++j)
+ {
+ p.index = d->follows[old.index].elems[j].index;
+ insert(p, s);
+ }
+ /* Force rescan to start at the beginning. */
+ i = -1;
+ }
+
+ free(visited);
+}
+
+/* Perform bottom-up analysis on the parse tree, computing various functions.
+ Note that at this point, we're pretending constructs like \< are real
+ characters rather than constraints on what can follow them.
+
+ Nullable: A node is nullable if it is at the root of a regexp that can
+ match the empty string.
+ * EMPTY leaves are nullable.
+ * No other leaf is nullable.
+ * A QMARK or STAR node is nullable.
+ * A PLUS node is nullable if its argument is nullable.
+ * A CAT node is nullable if both its arguments are nullable.
+ * An OR node is nullable if either argument is nullable.
+
+ Firstpos: The firstpos of a node is the set of positions (nonempty leaves)
+ that could correspond to the first character of a string matching the
+ regexp rooted at the given node.
+ * EMPTY leaves have empty firstpos.
+ * The firstpos of a nonempty leaf is that leaf itself.
+ * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its
+ argument.
+ * The firstpos of a CAT node is the firstpos of the left argument, union
+ the firstpos of the right if the left argument is nullable.
+ * The firstpos of an OR node is the union of firstpos of each argument.
+
+ Lastpos: The lastpos of a node is the set of positions that could
+ correspond to the last character of a string matching the regexp at
+ the given node.
+ * EMPTY leaves have empty lastpos.
+ * The lastpos of a nonempty leaf is that leaf itself.
+ * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its
+ argument.
+ * The lastpos of a CAT node is the lastpos of its right argument, union
+ the lastpos of the left if the right argument is nullable.
+ * The lastpos of an OR node is the union of the lastpos of each argument.
+
+ Follow: The follow of a position is the set of positions that could
+ correspond to the character following a character matching the node in
+ a string matching the regexp. At this point we consider special symbols
+ that match the empty string in some context to be just normal characters.
+ Later, if we find that a special symbol is in a follow set, we will
+ replace it with the elements of its follow, labeled with an appropriate
+ constraint.
+ * Every node in the firstpos of the argument of a STAR or PLUS node is in
+ the follow of every node in the lastpos.
+ * Every node in the firstpos of the second argument of a CAT node is in
+ the follow of every node in the lastpos of the first argument.
+
+ Because of the postfix representation of the parse tree, the depth-first
+ analysis is conveniently done by a linear scan with the aid of a stack.
+ Sets are stored as arrays of the elements, obeying a stack-like allocation
+ scheme; the number of elements in each set deeper in the stack can be
+ used to determine the address of a particular set's array. */
+void
+dfaanalyze(d, searchflag)
+ struct dfa *d;
+ int searchflag;
+{
+ int *nullable; /* Nullable stack. */
+ int *nfirstpos; /* Element count stack for firstpos sets. */
+ position *firstpos; /* Array where firstpos elements are stored. */
+ int *nlastpos; /* Element count stack for lastpos sets. */
+ position *lastpos; /* Array where lastpos elements are stored. */
+ int *nalloc; /* Sizes of arrays allocated to follow sets. */
+ position_set tmp; /* Temporary set for merging sets. */
+ position_set merged; /* Result of merging sets. */
+ int wants_newline; /* True if some position wants newline info. */
+ int *o_nullable;
+ int *o_nfirst, *o_nlast;
+ position *o_firstpos, *o_lastpos;
+ int i, j;
+ position *pos;
+
+#ifdef DEBUG
+ fprintf(stderr, "dfaanalyze:\n");
+ for (i = 0; i < d->tindex; ++i)
+ {
+ fprintf(stderr, " %d:", i);
+ prtok(d->tokens[i]);
+ }
+ putc('\n', stderr);
+#endif
+
+ d->searchflag = searchflag;
+
+ MALLOC(nullable, int, d->depth);
+ o_nullable = nullable;
+ MALLOC(nfirstpos, int, d->depth);
+ o_nfirst = nfirstpos;
+ MALLOC(firstpos, position, d->nleaves);
+ o_firstpos = firstpos, firstpos += d->nleaves;
+ MALLOC(nlastpos, int, d->depth);
+ o_nlast = nlastpos;
+ MALLOC(lastpos, position, d->nleaves);
+ o_lastpos = lastpos, lastpos += d->nleaves;
+ MALLOC(nalloc, int, d->tindex);
+ for (i = 0; i < d->tindex; ++i)
+ nalloc[i] = 0;
+ MALLOC(merged.elems, position, d->nleaves);
+
+ CALLOC(d->follows, position_set, d->tindex);
+
+ for (i = 0; i < d->tindex; ++i)
+#ifdef DEBUG
+ { /* Nonsyntactic #ifdef goo... */
+#endif
+ switch (d->tokens[i])
+ {
+ case EMPTY:
+ /* The empty set is nullable. */
+ *nullable++ = 1;
+
+ /* The firstpos and lastpos of the empty leaf are both empty. */
+ *nfirstpos++ = *nlastpos++ = 0;
+ break;
+
+ case STAR:
+ case PLUS:
+ /* Every element in the firstpos of the argument is in the follow
+ of every element in the lastpos. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos;
+ for (j = 0; j < nlastpos[-1]; ++j)
+ {
+ merge(&tmp, &d->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
+ nalloc[pos[j].index], merged.nelem - 1);
+ copy(&merged, &d->follows[pos[j].index]);
+ }
+
+ case QMARK:
+ /* A QMARK or STAR node is automatically nullable. */
+ if (d->tokens[i] != PLUS)
+ nullable[-1] = 1;
+ break;
+
+ case CAT:
+ /* Every element in the firstpos of the second argument is in the
+ follow of every element in the lastpos of the first argument. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos + nlastpos[-1];
+ for (j = 0; j < nlastpos[-2]; ++j)
+ {
+ merge(&tmp, &d->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
+ nalloc[pos[j].index], merged.nelem - 1);
+ copy(&merged, &d->follows[pos[j].index]);
+ }
+
+ /* The firstpos of a CAT node is the firstpos of the first argument,
+ union that of the second argument if the first is nullable. */
+ if (nullable[-2])
+ nfirstpos[-2] += nfirstpos[-1];
+ else
+ firstpos += nfirstpos[-1];
+ --nfirstpos;
+
+ /* The lastpos of a CAT node is the lastpos of the second argument,
+ union that of the first argument if the second is nullable. */
+ if (nullable[-1])
+ nlastpos[-2] += nlastpos[-1];
+ else
+ {
+ pos = lastpos + nlastpos[-2];
+ for (j = nlastpos[-1] - 1; j >= 0; --j)
+ pos[j] = lastpos[j];
+ lastpos += nlastpos[-2];
+ nlastpos[-2] = nlastpos[-1];
+ }
+ --nlastpos;
+
+ /* A CAT node is nullable if both arguments are nullable. */
+ nullable[-2] = nullable[-1] && nullable[-2];
+ --nullable;
+ break;
+
+ case OR:
+ case ORTOP:
+ /* The firstpos is the union of the firstpos of each argument. */
+ nfirstpos[-2] += nfirstpos[-1];
+ --nfirstpos;
+
+ /* The lastpos is the union of the lastpos of each argument. */
+ nlastpos[-2] += nlastpos[-1];
+ --nlastpos;
+
+ /* An OR node is nullable if either argument is nullable. */
+ nullable[-2] = nullable[-1] || nullable[-2];
+ --nullable;
+ break;
+
+ default:
+ /* Anything else is a nonempty position. (Note that special
+ constructs like \< are treated as nonempty strings here;
+ an "epsilon closure" effectively makes them nullable later.
+ Backreferences have to get a real position so we can detect
+ transitions on them later. But they are nullable. */
+ *nullable++ = d->tokens[i] == BACKREF;
+
+ /* This position is in its own firstpos and lastpos. */
+ *nfirstpos++ = *nlastpos++ = 1;
+ --firstpos, --lastpos;
+ firstpos->index = lastpos->index = i;
+ firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
+
+ /* Allocate the follow set for this position. */
+ nalloc[i] = 1;
+ MALLOC(d->follows[i].elems, position, nalloc[i]);
+ break;
+ }
+#ifdef DEBUG
+ /* ... balance the above nonsyntactic #ifdef goo... */
+ fprintf(stderr, "node %d:", i);
+ prtok(d->tokens[i]);
+ putc('\n', stderr);
+ fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+ fprintf(stderr, " firstpos:");
+ for (j = nfirstpos[-1] - 1; j >= 0; --j)
+ {
+ fprintf(stderr, " %d:", firstpos[j].index);
+ prtok(d->tokens[firstpos[j].index]);
+ }
+ fprintf(stderr, "\n lastpos:");
+ for (j = nlastpos[-1] - 1; j >= 0; --j)
+ {
+ fprintf(stderr, " %d:", lastpos[j].index);
+ prtok(d->tokens[lastpos[j].index]);
+ }
+ putc('\n', stderr);
+ }
+#endif
+
+ /* For each follow set that is the follow set of a real position, replace
+ it with its epsilon closure. */
+ for (i = 0; i < d->tindex; ++i)
+ if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
+ || d->tokens[i] >= CSET)
+ {
+#ifdef DEBUG
+ fprintf(stderr, "follows(%d:", i);
+ prtok(d->tokens[i]);
+ fprintf(stderr, "):");
+ for (j = d->follows[i].nelem - 1; j >= 0; --j)
+ {
+ fprintf(stderr, " %d:", d->follows[i].elems[j].index);
+ prtok(d->tokens[d->follows[i].elems[j].index]);
+ }
+ putc('\n', stderr);
+#endif
+ copy(&d->follows[i], &merged);
+ epsclosure(&merged, d);
+ if (d->follows[i].nelem < merged.nelem)
+ REALLOC(d->follows[i].elems, position, merged.nelem);
+ copy(&merged, &d->follows[i]);
+ }
+
+ /* Get the epsilon closure of the firstpos of the regexp. The result will
+ be the set of positions of state 0. */
+ merged.nelem = 0;
+ for (i = 0; i < nfirstpos[-1]; ++i)
+ insert(firstpos[i], &merged);
+ epsclosure(&merged, d);
+
+ /* Check if any of the positions of state 0 will want newline context. */
+ wants_newline = 0;
+ for (i = 0; i < merged.nelem; ++i)
+ if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
+ wants_newline = 1;
+
+ /* Build the initial state. */
+ d->salloc = 1;
+ d->sindex = 0;
+ MALLOC(d->states, dfa_state, d->salloc);
+ state_index(d, &merged, wants_newline, 0);
+
+ free(o_nullable);
+ free(o_nfirst);
+ free(o_firstpos);
+ free(o_nlast);
+ free(o_lastpos);
+ free(nalloc);
+ free(merged.elems);
+}
+
+/* Find, for each character, the transition out of state s of d, and store
+ it in the appropriate slot of trans.
+
+ We divide the positions of s into groups (positions can appear in more
+ than one group). Each group is labeled with a set of characters that
+ every position in the group matches (taking into account, if necessary,
+ preceding context information of s). For each group, find the union
+ of the its elements' follows. This set is the set of positions of the
+ new state. For each character in the group's label, set the transition
+ on this character to be to a state corresponding to the set's positions,
+ and its associated backward context information, if necessary.
+
+ If we are building a searching matcher, we include the positions of state
+ 0 in every state.
+
+ The collection of groups is constructed by building an equivalence-class
+ partition of the positions of s.
+
+ For each position, find the set of characters C that it matches. Eliminate
+ any characters from C that fail on grounds of backward context.
+
+ Search through the groups, looking for a group whose label L has nonempty
+ intersection with C. If L - C is nonempty, create a new group labeled
+ L - C and having the same positions as the current group, and set L to
+ the intersection of L and C. Insert the position in this group, set
+ C = C - L, and resume scanning.
+
+ If after comparing with every group there are characters remaining in C,
+ create a new group labeled with the characters of C and insert this
+ position in that group. */
+void
+dfastate(s, d, trans)
+ int s;
+ struct dfa *d;
+ int trans[];
+{
+ position_set grps[NOTCHAR]; /* As many as will ever be needed. */
+ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
+ int ngrps = 0; /* Number of groups actually used. */
+ position pos; /* Current position being considered. */
+ charclass matches; /* Set of matching characters. */
+ int matchesf; /* True if matches is nonempty. */
+ charclass intersect; /* Intersection with some label set. */
+ int intersectf; /* True if intersect is nonempty. */
+ charclass leftovers; /* Stuff in the label that didn't match. */
+ int leftoversf; /* True if leftovers is nonempty. */
+ static charclass letters; /* Set of characters considered letters. */
+ static charclass newline; /* Set of characters that aren't newline. */
+ position_set follows; /* Union of the follows of some group. */
+ position_set tmp; /* Temporary space for merging sets. */
+ int state; /* New state. */
+ int wants_newline; /* New state wants to know newline context. */
+ int state_newline; /* New state on a newline transition. */
+ int wants_letter; /* New state wants to know letter context. */
+ int state_letter; /* New state on a letter transition. */
+ static int initialized; /* Flag for static initialization. */
+ int i, j, k;
+
+ /* Initialize the set of letters, if necessary. */
+ if (! initialized)
+ {
+ initialized = 1;
+ for (i = 0; i < NOTCHAR; ++i)
+ if (ISALNUM(i))
+ setbit(i, letters);
+ setbit('\n', newline);
+ }
+
+ zeroset(matches);
+
+ for (i = 0; i < d->states[s].elems.nelem; ++i)
+ {
+ pos = d->states[s].elems.elems[i];
+ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
+ setbit(d->tokens[pos.index], matches);
+ else if (d->tokens[pos.index] >= CSET)
+ copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
+ else
+ continue;
+
+ /* Some characters may need to be eliminated from matches because
+ they fail in the current context. */
+ if (pos.constraint != 0xFF)
+ {
+ if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ d->states[s].newline, 1))
+ clrbit('\n', matches);
+ if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ d->states[s].newline, 0))
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ matches[j] &= newline[j];
+ if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+ d->states[s].letter, 1))
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ matches[j] &= ~letters[j];
+ if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+ d->states[s].letter, 0))
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ matches[j] &= letters[j];
+
+ /* If there are no characters left, there's no point in going on. */
+ for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
+ continue;
+ if (j == CHARCLASS_INTS)
+ continue;
+ }
+
+ for (j = 0; j < ngrps; ++j)
+ {
+ /* If matches contains a single character only, and the current
+ group's label doesn't contain that character, go on to the
+ next group. */
+ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
+ && !tstbit(d->tokens[pos.index], labels[j]))
+ continue;
+
+ /* Check if this group's label has a nonempty intersection with
+ matches. */
+ intersectf = 0;
+ for (k = 0; k < CHARCLASS_INTS; ++k)
+ (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
+ if (! intersectf)
+ continue;
+
+ /* It does; now find the set differences both ways. */
+ leftoversf = matchesf = 0;
+ for (k = 0; k < CHARCLASS_INTS; ++k)
+ {
+ /* Even an optimizing compiler can't know this for sure. */
+ int match = matches[k], label = labels[j][k];
+
+ (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
+ (matches[k] = match & ~label) ? (matchesf = 1) : 0;
+ }
+
+ /* If there were leftovers, create a new group labeled with them. */
+ if (leftoversf)
+ {
+ copyset(leftovers, labels[ngrps]);
+ copyset(intersect, labels[j]);
+ MALLOC(grps[ngrps].elems, position, d->nleaves);
+ copy(&grps[j], &grps[ngrps]);
+ ++ngrps;
+ }
+
+ /* Put the position in the current group. Note that there is no
+ reason to call insert() here. */
+ grps[j].elems[grps[j].nelem++] = pos;
+
+ /* If every character matching the current position has been
+ accounted for, we're done. */
+ if (! matchesf)
+ break;
+ }
+
+ /* If we've passed the last group, and there are still characters
+ unaccounted for, then we'll have to create a new group. */
+ if (j == ngrps)
+ {
+ copyset(matches, labels[ngrps]);
+ zeroset(matches);
+ MALLOC(grps[ngrps].elems, position, d->nleaves);
+ grps[ngrps].nelem = 1;
+ grps[ngrps].elems[0] = pos;
+ ++ngrps;
+ }
+ }
+
+ MALLOC(follows.elems, position, d->nleaves);
+ MALLOC(tmp.elems, position, d->nleaves);
+
+ /* If we are a searching matcher, the default transition is to a state
+ containing the positions of state 0, otherwise the default transition
+ is to fail miserably. */
+ if (d->searchflag)
+ {
+ wants_newline = 0;
+ wants_letter = 0;
+ for (i = 0; i < d->states[0].elems.nelem; ++i)
+ {
+ if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
+ wants_newline = 1;
+ if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
+ wants_letter = 1;
+ }
+ copy(&d->states[0].elems, &follows);
+ state = state_index(d, &follows, 0, 0);
+ if (wants_newline)
+ state_newline = state_index(d, &follows, 1, 0);
+ else
+ state_newline = state;
+ if (wants_letter)
+ state_letter = state_index(d, &follows, 0, 1);
+ else
+ state_letter = state;
+ for (i = 0; i < NOTCHAR; ++i)
+ if (i == '\n')
+ trans[i] = state_newline;
+ else if (ISALNUM(i))
+ trans[i] = state_letter;
+ else
+ trans[i] = state;
+ }
+ else
+ for (i = 0; i < NOTCHAR; ++i)
+ trans[i] = -1;
+
+ for (i = 0; i < ngrps; ++i)
+ {
+ follows.nelem = 0;
+
+ /* Find the union of the follows of the positions of the group.
+ This is a hideously inefficient loop. Fix it someday. */
+ for (j = 0; j < grps[i].nelem; ++j)
+ for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
+ insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
+
+ /* If we are building a searching matcher, throw in the positions
+ of state 0 as well. */
+ if (d->searchflag)
+ for (j = 0; j < d->states[0].elems.nelem; ++j)
+ insert(d->states[0].elems.elems[j], &follows);
+
+ /* Find out if the new state will want any context information. */
+ wants_newline = 0;
+ if (tstbit('\n', labels[i]))
+ for (j = 0; j < follows.nelem; ++j)
+ if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
+ wants_newline = 1;
+
+ wants_letter = 0;
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ if (labels[i][j] & letters[j])
+ break;
+ if (j < CHARCLASS_INTS)
+ for (j = 0; j < follows.nelem; ++j)
+ if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
+ wants_letter = 1;
+
+ /* Find the state(s) corresponding to the union of the follows. */
+ state = state_index(d, &follows, 0, 0);
+ if (wants_newline)
+ state_newline = state_index(d, &follows, 1, 0);
+ else
+ state_newline = state;
+ if (wants_letter)
+ state_letter = state_index(d, &follows, 0, 1);
+ else
+ state_letter = state;
+
+ /* Set the transitions for each character in the current label. */
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ for (k = 0; k < INTBITS; ++k)
+ if (labels[i][j] & 1 << k)
+ {
+ int c = j * INTBITS + k;
+
+ if (c == '\n')
+ trans[c] = state_newline;
+ else if (ISALNUM(c))
+ trans[c] = state_letter;
+ else if (c < NOTCHAR)
+ trans[c] = state;
+ }
+ }
+
+ for (i = 0; i < ngrps; ++i)
+ free(grps[i].elems);
+ free(follows.elems);
+ free(tmp.elems);
+}
+
+/* Some routines for manipulating a compiled dfa's transition tables.
+ Each state may or may not have a transition table; if it does, and it
+ is a non-accepting state, then d->trans[state] points to its table.
+ If it is an accepting state then d->fails[state] points to its table.
+ If it has no table at all, then d->trans[state] is NULL.
+ TODO: Improve this comment, get rid of the unnecessary redundancy. */
+
+static void
+build_state(s, d)
+ int s;
+ struct dfa *d;
+{
+ int *trans; /* The new transition table. */
+ int i;
+
+ /* Set an upper limit on the number of transition tables that will ever
+ exist at once. 1024 is arbitrary. The idea is that the frequently
+ used transition tables will be quickly rebuilt, whereas the ones that
+ were only needed once or twice will be cleared away. */
+ if (d->trcount >= 1024)
+ {
+ for (i = 0; i < d->tralloc; ++i)
+ if (d->trans[i])
+ {
+ free((ptr_t) d->trans[i]);
+ d->trans[i] = NULL;
+ }
+ else if (d->fails[i])
+ {
+ free((ptr_t) d->fails[i]);
+ d->fails[i] = NULL;
+ }
+ d->trcount = 0;
+ }
+
+ ++d->trcount;
+
+ /* Set up the success bits for this state. */
+ d->success[s] = 0;
+ if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
+ s, *d))
+ d->success[s] |= 4;
+ if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
+ s, *d))
+ d->success[s] |= 2;
+ if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
+ s, *d))
+ d->success[s] |= 1;
+
+ MALLOC(trans, int, NOTCHAR);
+ dfastate(s, d, trans);
+
+ /* Now go through the new transition table, and make sure that the trans
+ and fail arrays are allocated large enough to hold a pointer for the
+ largest state mentioned in the table. */
+ for (i = 0; i < NOTCHAR; ++i)
+ if (trans[i] >= d->tralloc)
+ {
+ int oldalloc = d->tralloc;
+
+ while (trans[i] >= d->tralloc)
+ d->tralloc *= 2;
+ REALLOC(d->realtrans, int *, d->tralloc + 1);
+ d->trans = d->realtrans + 1;
+ REALLOC(d->fails, int *, d->tralloc);
+ REALLOC(d->success, int, d->tralloc);
+ REALLOC(d->newlines, int, d->tralloc);
+ while (oldalloc < d->tralloc)
+ {
+ d->trans[oldalloc] = NULL;
+ d->fails[oldalloc++] = NULL;
+ }
+ }
+
+ /* Keep the newline transition in a special place so we can use it as
+ a sentinel. */
+ d->newlines[s] = trans['\n'];
+ trans['\n'] = -1;
+
+ if (ACCEPTING(s, *d))
+ d->fails[s] = trans;
+ else
+ d->trans[s] = trans;
+}
+
+static void
+build_state_zero(d)
+ struct dfa *d;
+{
+ d->tralloc = 1;
+ d->trcount = 0;
+ CALLOC(d->realtrans, int *, d->tralloc + 1);
+ d->trans = d->realtrans + 1;
+ CALLOC(d->fails, int *, d->tralloc);
+ MALLOC(d->success, int, d->tralloc);
+ MALLOC(d->newlines, int, d->tralloc);
+ build_state(0, d);
+}
+
+/* Search through a buffer looking for a match to the given struct dfa.
+ Find the first occurrence of a string matching the regexp in the buffer,
+ and the shortest possible version thereof. Return a pointer to the first
+ character after the match, or NULL if none is found. Begin points to
+ the beginning of the buffer, and end points to the first character after
+ its end. We store a newline in *end to act as a sentinel, so end had
+ better point somewhere valid. Newline is a flag indicating whether to
+ allow newlines to be in the matching string. If count is non-
+ NULL it points to a place we're supposed to increment every time we
+ see a newline. Finally, if backref is non-NULL it points to a place
+ where we're supposed to store a 1 if backreferencing happened and the
+ match needs to be verified by a backtracking matcher. Otherwise
+ we store a 0 in *backref. */
+char *
+dfaexec(d, begin, end, newline, count, backref)
+ struct dfa *d;
+ char *begin;
+ char *end;
+ int newline;
+ int *count;
+ int *backref;
+{
+ register int s, s1, tmp; /* Current state. */
+ register unsigned char *p; /* Current input character. */
+ register int **trans, *t; /* Copy of d->trans so it can be optimized
+ into a register. */
+ static int sbit[NOTCHAR]; /* Table for anding with d->success. */
+ static int sbit_init;
+
+ if (! sbit_init)
+ {
+ int i;
+
+ sbit_init = 1;
+ for (i = 0; i < NOTCHAR; ++i)
+ if (i == '\n')
+ sbit[i] = 4;
+ else if (ISALNUM(i))
+ sbit[i] = 2;
+ else
+ sbit[i] = 1;
+ }
+
+ if (! d->tralloc)
+ build_state_zero(d);
+
+ s = s1 = 0;
+ p = (unsigned char *) begin;
+ trans = d->trans;
+ *end = '\n';
+
+ for (;;)
+ {
+ /* The dreaded inner loop. */
+ if ((t = trans[s]) != 0)
+ do
+ {
+ s1 = t[*p++];
+ if (! (t = trans[s1]))
+ goto last_was_s;
+ s = t[*p++];
+ }
+ while ((t = trans[s]) != 0);
+ goto last_was_s1;
+ last_was_s:
+ tmp = s, s = s1, s1 = tmp;
+ last_was_s1:
+
+ if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
+ {
+ if (d->success[s] & sbit[*p])
+ {
+ if (backref)
+ if (d->states[s].backref)
+ *backref = 1;
+ else
+ *backref = 0;
+ return (char *) p;
+ }
+
+ s1 = s;
+ s = d->fails[s][*p++];
+ continue;
+ }
+
+ /* If the previous character was a newline, count it. */
+ if (count && (char *) p <= end && p[-1] == '\n')
+ ++*count;
+
+ /* Check if we've run off the end of the buffer. */
+ if ((char *) p > end)
+ return NULL;
+
+ if (s >= 0)
+ {
+ build_state(s, d);
+ trans = d->trans;
+ continue;
+ }
+
+ if (p[-1] == '\n' && newline)
+ {
+ s = d->newlines[s1];
+ continue;
+ }
+
+ s = 0;
+ }
+}
+
+/* Initialize the components of a dfa that the other routines don't
+ initialize for themselves. */
+void
+dfainit(d)
+ struct dfa *d;
+{
+ d->calloc = 1;
+ MALLOC(d->charclasses, charclass, d->calloc);
+ d->cindex = 0;
+
+ d->talloc = 1;
+ MALLOC(d->tokens, token, d->talloc);
+ d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+
+ d->searchflag = 0;
+ d->tralloc = 0;
+
+ d->musts = 0;
+}
+
+/* Parse and analyze a single string of the given length. */
+void
+dfacomp(s, len, d, searchflag)
+ char *s;
+ size_t len;
+ struct dfa *d;
+ int searchflag;
+{
+ if (case_fold) /* dummy folding in service of dfamust() */
+ {
+ char *lcopy;
+ int i;
+
+ lcopy = malloc(len);
+ if (!lcopy)
+ dfaerror("out of memory");
+
+ /* This is a kludge. */
+ case_fold = 0;
+ for (i = 0; i < len; ++i)
+ if (ISUPPER(s[i]))
+ lcopy[i] = tolower(s[i]);
+ else
+ lcopy[i] = s[i];
+
+ dfainit(d);
+ dfaparse(lcopy, len, d);
+ free(lcopy);
+ dfamust(d);
+ d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+ case_fold = 1;
+ dfaparse(s, len, d);
+ dfaanalyze(d, searchflag);
+ }
+ else
+ {
+ dfainit(d);
+ dfaparse(s, len, d);
+ dfamust(d);
+ dfaanalyze(d, searchflag);
+ }
+}
+
+/* Free the storage held by the components of a dfa. */
+void
+dfafree(d)
+ struct dfa *d;
+{
+ int i;
+ struct dfamust *dm, *ndm;
+
+ free((ptr_t) d->charclasses);
+ free((ptr_t) d->tokens);
+ for (i = 0; i < d->sindex; ++i)
+ free((ptr_t) d->states[i].elems.elems);
+ free((ptr_t) d->states);
+ for (i = 0; i < d->tindex; ++i)
+ if (d->follows[i].elems)
+ free((ptr_t) d->follows[i].elems);
+ free((ptr_t) d->follows);
+ for (i = 0; i < d->tralloc; ++i)
+ if (d->trans[i])
+ free((ptr_t) d->trans[i]);
+ else if (d->fails[i])
+ free((ptr_t) d->fails[i]);
+ if (d->realtrans) free((ptr_t) d->realtrans);
+ if (d->fails) free((ptr_t) d->fails);
+ if (d->newlines) free((ptr_t) d->newlines);
+ if (d->success) free((ptr_t) d->success);
+ for (dm = d->musts; dm; dm = ndm)
+ {
+ ndm = dm->next;
+ free(dm->must);
+ free((ptr_t) dm);
+ }
+}
+
+/* Having found the postfix representation of the regular expression,
+ try to find a long sequence of characters that must appear in any line
+ containing the r.e.
+ Finding a "longest" sequence is beyond the scope here;
+ we take an easy way out and hope for the best.
+ (Take "(ab|a)b"--please.)
+
+ We do a bottom-up calculation of sequences of characters that must appear
+ in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+ representation:
+ sequences that must appear at the left of the match ("left")
+ sequences that must appear at the right of the match ("right")
+ lists of sequences that must appear somewhere in the match ("in")
+ sequences that must constitute the match ("is")
+
+ When we get to the root of the tree, we use one of the longest of its
+ calculated "in" sequences as our answer. The sequence we find is returned in
+ d->must (where "d" is the single argument passed to "dfamust");
+ the length of the sequence is returned in d->mustn.
+
+ The sequences calculated for the various types of node (in pseudo ANSI c)
+ are shown below. "p" is the operand of unary operators (and the left-hand
+ operand of binary operators); "q" is the right-hand operand of binary
+ operators.
+
+ "ZERO" means "a zero-length sequence" below.
+
+ Type left right is in
+ ---- ---- ----- -- --
+ char c # c # c # c # c
+
+ CSET ZERO ZERO ZERO ZERO
+
+ STAR ZERO ZERO ZERO ZERO
+
+ QMARK ZERO ZERO ZERO ZERO
+
+ PLUS p->left p->right ZERO p->in
+
+ CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
+ p->left : q->right : q->is!=ZERO) ? q->in plus
+ p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
+ ZERO
+
+ OR longest common longest common (do p->is and substrings common to
+ leading trailing q->is have same p->in and q->in
+ (sub)sequence (sub)sequence length and
+ of p->left of p->right content) ?
+ and q->left and q->right p->is : NULL
+
+ If there's anything else we recognize in the tree, all four sequences get set
+ to zero-length sequences. If there's something we don't recognize in the tree,
+ we just return a zero-length sequence.
+
+ Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+ 'aaa')?
+
+ And. . .is it here or someplace that we might ponder "optimizations" such as
+ egrep 'psi|epsilon' -> egrep 'psi'
+ egrep 'pepsi|epsilon' -> egrep 'epsi'
+ (Yes, we now find "epsi" as a "string
+ that must occur", but we might also
+ simplify the *entire* r.e. being sought)
+ grep '[c]' -> grep 'c'
+ grep '(ab|a)b' -> grep 'ab'
+ grep 'ab*' -> grep 'a'
+ grep 'a*b' -> grep 'b'
+
+ There are several issues:
+
+ Is optimization easy (enough)?
+
+ Does optimization actually accomplish anything,
+ or is the automaton you get from "psi|epsilon" (for example)
+ the same as the one you get from "psi" (for example)?
+
+ Are optimizable r.e.'s likely to be used in real-life situations
+ (something like 'ab*' is probably unlikely; something like is
+ 'psi|epsilon' is likelier)? */
+
+static char *
+icatalloc(old, new)
+ char *old;
+ char *new;
+{
+ char *result;
+ size_t oldsize, newsize;
+
+ newsize = (new == NULL) ? 0 : strlen(new);
+ if (old == NULL)
+ oldsize = 0;
+ else if (newsize == 0)
+ return old;
+ else oldsize = strlen(old);
+ if (old == NULL)
+ result = (char *) malloc(newsize + 1);
+ else
+ result = (char *) realloc((void *) old, oldsize + newsize + 1);
+ if (result != NULL && new != NULL)
+ (void) strcpy(result + oldsize, new);
+ return result;
+}
+
+static char *
+icpyalloc(string)
+ char *string;
+{
+ return icatalloc((char *) NULL, string);
+}
+
+static char *
+istrstr(lookin, lookfor)
+ char *lookin;
+ char *lookfor;
+{
+ char *cp;
+ size_t len;
+
+ len = strlen(lookfor);
+ for (cp = lookin; *cp != '\0'; ++cp)
+ if (strncmp(cp, lookfor, len) == 0)
+ return cp;
+ return NULL;
+}
+
+static void
+ifree(cp)
+ char *cp;
+{
+ if (cp != NULL)
+ free(cp);
+}
+
+static void
+freelist(cpp)
+ char **cpp;
+{
+ int i;
+
+ if (cpp == NULL)
+ return;
+ for (i = 0; cpp[i] != NULL; ++i)
+ {
+ free(cpp[i]);
+ cpp[i] = NULL;
+ }
+}
+
+static char **
+enlist(cpp, new, len)
+ char **cpp;
+ char *new;
+ size_t len;
+{
+ int i, j;
+
+ if (cpp == NULL)
+ return NULL;
+ if ((new = icpyalloc(new)) == NULL)
+ {
+ freelist(cpp);
+ return NULL;
+ }
+ new[len] = '\0';
+ /* Is there already something in the list that's new (or longer)? */
+ for (i = 0; cpp[i] != NULL; ++i)
+ if (istrstr(cpp[i], new) != NULL)
+ {
+ free(new);
+ return cpp;
+ }
+ /* Eliminate any obsoleted strings. */
+ j = 0;
+ while (cpp[j] != NULL)
+ if (istrstr(new, cpp[j]) == NULL)
+ ++j;
+ else
+ {
+ free(cpp[j]);
+ if (--i == j)
+ break;
+ cpp[j] = cpp[i];
+ cpp[i] = NULL;
+ }
+ /* Add the new string. */
+ cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[i] = new;
+ cpp[i + 1] = NULL;
+ return cpp;
+}
+
+/* Given pointers to two strings, return a pointer to an allocated
+ list of their distinct common substrings. Return NULL if something
+ seems wild. */
+static char **
+comsubs(left, right)
+ char *left;
+ char *right;
+{
+ char **cpp;
+ char *lcp;
+ char *rcp;
+ size_t i, len;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ cpp = (char **) malloc(sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[0] = NULL;
+ for (lcp = left; *lcp != '\0'; ++lcp)
+ {
+ len = 0;
+ rcp = index(right, *lcp);
+ while (rcp != NULL)
+ {
+ for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+ continue;
+ if (i > len)
+ len = i;
+ rcp = index(rcp + 1, *lcp);
+ }
+ if (len == 0)
+ continue;
+ if ((cpp = enlist(cpp, lcp, len)) == NULL)
+ break;
+ }
+ return cpp;
+}
+
+static char **
+addlists(old, new)
+char **old;
+char **new;
+{
+ int i;
+
+ if (old == NULL || new == NULL)
+ return NULL;
+ for (i = 0; new[i] != NULL; ++i)
+ {
+ old = enlist(old, new[i], strlen(new[i]));
+ if (old == NULL)
+ break;
+ }
+ return old;
+}
+
+/* Given two lists of substrings, return a new list giving substrings
+ common to both. */
+static char **
+inboth(left, right)
+ char **left;
+ char **right;
+{
+ char **both;
+ char **temp;
+ int lnum, rnum;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ both = (char **) malloc(sizeof *both);
+ if (both == NULL)
+ return NULL;
+ both[0] = NULL;
+ for (lnum = 0; left[lnum] != NULL; ++lnum)
+ {
+ for (rnum = 0; right[rnum] != NULL; ++rnum)
+ {
+ temp = comsubs(left[lnum], right[rnum]);
+ if (temp == NULL)
+ {
+ freelist(both);
+ return NULL;
+ }
+ both = addlists(both, temp);
+ freelist(temp);
+ free(temp);
+ if (both == NULL)
+ return NULL;
+ }
+ }
+ return both;
+}
+
+typedef struct
+{
+ char **in;
+ char *left;
+ char *right;
+ char *is;
+} must;
+
+static void
+resetmust(mp)
+must *mp;
+{
+ mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ freelist(mp->in);
+}
+
+static void
+dfamust(dfa)
+struct dfa *dfa;
+{
+ must *musts;
+ must *mp;
+ char *result;
+ int ri;
+ int i;
+ int exact;
+ token t;
+ static must must0;
+ struct dfamust *dm;
+ static char empty_string[] = "";
+
+ result = empty_string;
+ exact = 0;
+ musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
+ if (musts == NULL)
+ return;
+ mp = musts;
+ for (i = 0; i <= dfa->tindex; ++i)
+ mp[i] = must0;
+ for (i = 0; i <= dfa->tindex; ++i)
+ {
+ mp[i].in = (char **) malloc(sizeof *mp[i].in);
+ mp[i].left = malloc(2);
+ mp[i].right = malloc(2);
+ mp[i].is = malloc(2);
+ if (mp[i].in == NULL || mp[i].left == NULL ||
+ mp[i].right == NULL || mp[i].is == NULL)
+ goto done;
+ mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
+ mp[i].in[0] = NULL;
+ }
+#ifdef DEBUG
+ fprintf(stderr, "dfamust:\n");
+ for (i = 0; i < dfa->tindex; ++i)
+ {
+ fprintf(stderr, " %d:", i);
+ prtok(dfa->tokens[i]);
+ }
+ putc('\n', stderr);
+#endif
+ for (ri = 0; ri < dfa->tindex; ++ri)
+ {
+ switch (t = dfa->tokens[ri])
+ {
+ case LPAREN:
+ case RPAREN:
+ goto done; /* "cannot happen" */
+ case EMPTY:
+ case BEGLINE:
+ case ENDLINE:
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ case BACKREF:
+ resetmust(mp);
+ break;
+ case STAR:
+ case QMARK:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ resetmust(mp);
+ break;
+ case OR:
+ case ORTOP:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ char **new;
+ must *lmp;
+ must *rmp;
+ int j, ln, rn, n;
+
+ rmp = --mp;
+ lmp = --mp;
+ /* Guaranteed to be. Unlikely, but. . . */
+ if (strcmp(lmp->is, rmp->is) != 0)
+ lmp->is[0] = '\0';
+ /* Left side--easy */
+ i = 0;
+ while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
+ ++i;
+ lmp->left[i] = '\0';
+ /* Right side */
+ ln = strlen(lmp->right);
+ rn = strlen(rmp->right);
+ n = ln;
+ if (n > rn)
+ n = rn;
+ for (i = 0; i < n; ++i)
+ if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
+ break;
+ for (j = 0; j < i; ++j)
+ lmp->right[j] = lmp->right[(ln - i) + j];
+ lmp->right[j] = '\0';
+ new = inboth(lmp->in, rmp->in);
+ if (new == NULL)
+ goto done;
+ freelist(lmp->in);
+ free((char *) lmp->in);
+ lmp->in = new;
+ }
+ break;
+ case PLUS:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ mp->is[0] = '\0';
+ break;
+ case END:
+ if (mp != &musts[1])
+ goto done; /* "cannot happen" */
+ for (i = 0; musts[0].in[i] != NULL; ++i)
+ if (strlen(musts[0].in[i]) > strlen(result))
+ result = musts[0].in[i];
+ if (strcmp(result, musts[0].is) == 0)
+ exact = 1;
+ goto done;
+ case CAT:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ must *lmp;
+ must *rmp;
+
+ rmp = --mp;
+ lmp = --mp;
+ /* In. Everything in left, plus everything in
+ right, plus catenation of
+ left's right and right's left. */
+ lmp->in = addlists(lmp->in, rmp->in);
+ if (lmp->in == NULL)
+ goto done;
+ if (lmp->right[0] != '\0' &&
+ rmp->left[0] != '\0')
+ {
+ char *tp;
+
+ tp = icpyalloc(lmp->right);
+ if (tp == NULL)
+ goto done;
+ tp = icatalloc(tp, rmp->left);
+ if (tp == NULL)
+ goto done;
+ lmp->in = enlist(lmp->in, tp,
+ strlen(tp));
+ free(tp);
+ if (lmp->in == NULL)
+ goto done;
+ }
+ /* Left-hand */
+ if (lmp->is[0] != '\0')
+ {
+ lmp->left = icatalloc(lmp->left,
+ rmp->left);
+ if (lmp->left == NULL)
+ goto done;
+ }
+ /* Right-hand */
+ if (rmp->is[0] == '\0')
+ lmp->right[0] = '\0';
+ lmp->right = icatalloc(lmp->right, rmp->right);
+ if (lmp->right == NULL)
+ goto done;
+ /* Guaranteed to be */
+ if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
+ {
+ lmp->is = icatalloc(lmp->is, rmp->is);
+ if (lmp->is == NULL)
+ goto done;
+ }
+ else
+ lmp->is[0] = '\0';
+ }
+ break;
+ default:
+ if (t < END)
+ {
+ /* "cannot happen" */
+ goto done;
+ }
+ else if (t == '\0')
+ {
+ /* not on *my* shift */
+ goto done;
+ }
+ else if (t >= CSET)
+ {
+ /* easy enough */
+ resetmust(mp);
+ }
+ else
+ {
+ /* plain character */
+ resetmust(mp);
+ mp->is[0] = mp->left[0] = mp->right[0] = t;
+ mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+ mp->in = enlist(mp->in, mp->is, (size_t)1);
+ if (mp->in == NULL)
+ goto done;
+ }
+ break;
+ }
+#ifdef DEBUG
+ fprintf(stderr, " node: %d:", ri);
+ prtok(dfa->tokens[ri]);
+ fprintf(stderr, "\n in:");
+ for (i = 0; mp->in[i]; ++i)
+ fprintf(stderr, " \"%s\"", mp->in[i]);
+ fprintf(stderr, "\n is: \"%s\"\n", mp->is);
+ fprintf(stderr, " left: \"%s\"\n", mp->left);
+ fprintf(stderr, " right: \"%s\"\n", mp->right);
+#endif
+ ++mp;
+ }
+ done:
+ if (strlen(result))
+ {
+ dm = (struct dfamust *) malloc(sizeof (struct dfamust));
+ dm->exact = exact;
+ dm->must = malloc(strlen(result) + 1);
+ strcpy(dm->must, result);
+ dm->next = dfa->musts;
+ dfa->musts = dm;
+ }
+ mp = musts;
+ for (i = 0; i <= dfa->tindex; ++i)
+ {
+ freelist(mp[i].in);
+ ifree((char *) mp[i].in);
+ ifree(mp[i].left);
+ ifree(mp[i].right);
+ ifree(mp[i].is);
+ }
+ free((char *) mp);
+}
diff --git a/contrib/awk/dfa.h b/contrib/awk/dfa.h
new file mode 100644
index 0000000..dda5181
--- /dev/null
+++ b/contrib/awk/dfa.h
@@ -0,0 +1,364 @@
+/* dfa.h - declarations for GNU deterministic regexp compiler
+ Copyright (C) 1988 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
+
+/* Written June, 1988 by Mike Haertel */
+
+/* FIXME:
+ 2. We should not export so much of the DFA internals.
+ In addition to clobbering modularity, we eat up valuable
+ name space. */
+
+/* Number of bits in an unsigned char. */
+#ifndef CHARBITS
+#define CHARBITS 8
+#endif
+
+/* First integer value that is greater than any character code. */
+#define NOTCHAR (1 << CHARBITS)
+
+/* INTBITS need not be exact, just a lower bound. */
+#ifndef INTBITS
+#define INTBITS (CHARBITS * sizeof (int))
+#endif
+
+/* Number of ints required to hold a bit for every character. */
+#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
+
+/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
+typedef int charclass[CHARCLASS_INTS];
+
+/* The regexp is parsed into an array of tokens in postfix form. Some tokens
+ are operators and others are terminal symbols. Most (but not all) of these
+ codes are returned by the lexical analyzer. */
+
+typedef enum
+{
+ END = -1, /* END is a terminal symbol that matches the
+ end of input; any value of END or less in
+ the parse tree is such a symbol. Accepting
+ states of the DFA are those that would have
+ a transition on END. */
+
+ /* Ordinary character values are terminal symbols that match themselves. */
+
+ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
+ the empty string. */
+
+ BACKREF, /* BACKREF is generated by \<digit>; it
+ it not completely handled. If the scanner
+ detects a transition on backref, it returns
+ a kind of "semi-success" indicating that
+ the match will have to be verified with
+ a backtracking matcher. */
+
+ BEGLINE, /* BEGLINE is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a line. */
+
+ ENDLINE, /* ENDLINE is a terminal symbol that matches
+ the empty string if it is at the end of
+ a line. */
+
+ BEGWORD, /* BEGWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a word. */
+
+ ENDWORD, /* ENDWORD is a terminal symbol that matches
+ the empty string if it is at the end of
+ a word. */
+
+ LIMWORD, /* LIMWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ or the end of a word. */
+
+ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
+ matches the empty string if it is not at
+ the beginning or end of a word. */
+
+ QMARK, /* QMARK is an operator of one argument that
+ matches zero or one occurences of its
+ argument. */
+
+ STAR, /* STAR is an operator of one argument that
+ matches the Kleene closure (zero or more
+ occurrences) of its argument. */
+
+ PLUS, /* PLUS is an operator of one argument that
+ matches the positive closure (one or more
+ occurrences) of its argument. */
+
+ REPMN, /* REPMN is a lexical token corresponding
+ to the {m,n} construct. REPMN never
+ appears in the compiled token vector. */
+
+ CAT, /* CAT is an operator of two arguments that
+ matches the concatenation of its
+ arguments. CAT is never returned by the
+ lexical analyzer. */
+
+ OR, /* OR is an operator of two arguments that
+ matches either of its arguments. */
+
+ ORTOP, /* OR at the toplevel in the parse tree.
+ This is used for a boyer-moore heuristic. */
+
+ LPAREN, /* LPAREN never appears in the parse tree,
+ it is only a lexeme. */
+
+ RPAREN, /* RPAREN never appears in the parse tree. */
+
+ CSET /* CSET and (and any value greater) is a
+ terminal symbol that matches any of a
+ class of characters. */
+} token;
+
+/* Sets are stored in an array in the compiled dfa; the index of the
+ array corresponding to a given set token is given by SET_INDEX(t). */
+#define SET_INDEX(t) ((t) - CSET)
+
+/* Sometimes characters can only be matched depending on the surrounding
+ context. Such context decisions depend on what the previous character
+ was, and the value of the current (lookahead) character. Context
+ dependent constraints are encoded as 8 bit integers. Each bit that
+ is set indicates that the constraint succeeds in the corresponding
+ context.
+
+ bit 7 - previous and current are newlines
+ bit 6 - previous was newline, current isn't
+ bit 5 - previous wasn't newline, current is
+ bit 4 - neither previous nor current is a newline
+ bit 3 - previous and current are word-constituents
+ bit 2 - previous was word-constituent, current isn't
+ bit 1 - previous wasn't word-constituent, current is
+ bit 0 - neither previous nor current is word-constituent
+
+ Word-constituent characters are those that satisfy isalnum().
+
+ The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+ succeeds in a particular context. Prevn is true if the previous character
+ was a newline, currn is true if the lookahead character is a newline.
+ Prevl and currl similarly depend upon whether the previous and current
+ characters are word-constituent letters. */
+#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
+#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+ ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
+#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+ (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+
+/* The following macros give information about what a constraint depends on. */
+#define PREV_NEWLINE_DEPENDENT(constraint) \
+ (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+#define PREV_LETTER_DEPENDENT(constraint) \
+ (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
+
+/* Tokens that match the empty string subject to some constraint actually
+ work by applying that constraint to determine what may follow them,
+ taking into account what has gone before. The following values are
+ the constraints corresponding to the special tokens previously defined. */
+#define NO_CONSTRAINT 0xff
+#define BEGLINE_CONSTRAINT 0xcf
+#define ENDLINE_CONSTRAINT 0xaf
+#define BEGWORD_CONSTRAINT 0xf2
+#define ENDWORD_CONSTRAINT 0xf4
+#define LIMWORD_CONSTRAINT 0xf6
+#define NOTLIMWORD_CONSTRAINT 0xf9
+
+/* States of the recognizer correspond to sets of positions in the parse
+ tree, together with the constraints under which they may be matched.
+ So a position is encoded as an index into the parse tree together with
+ a constraint. */
+typedef struct
+{
+ unsigned index; /* Index into the parse array. */
+ unsigned constraint; /* Constraint for matching this position. */
+} position;
+
+/* Sets of positions are stored as arrays. */
+typedef struct
+{
+ position *elems; /* Elements of this position set. */
+ int nelem; /* Number of elements in this set. */
+} position_set;
+
+/* A state of the dfa consists of a set of positions, some flags,
+ and the token value of the lowest-numbered position of the state that
+ contains an END token. */
+typedef struct
+{
+ int hash; /* Hash of the positions of this state. */
+ position_set elems; /* Positions this state could match. */
+ char newline; /* True if previous state matched newline. */
+ char letter; /* True if previous state matched a letter. */
+ char backref; /* True if this state matches a \<digit>. */
+ unsigned char constraint; /* Constraint for this state to accept. */
+ int first_end; /* Token value of the first END in elems. */
+} dfa_state;
+
+/* Element of a list of strings, at least one of which is known to
+ appear in any R.E. matching the DFA. */
+struct dfamust
+{
+ int exact;
+ char *must;
+ struct dfamust *next;
+};
+
+/* A compiled regular expression. */
+struct dfa
+{
+ /* Stuff built by the scanner. */
+ charclass *charclasses; /* Array of character sets for CSET tokens. */
+ int cindex; /* Index for adding new charclasses. */
+ int calloc; /* Number of charclasses currently allocated. */
+
+ /* Stuff built by the parser. */
+ token *tokens; /* Postfix parse array. */
+ int tindex; /* Index for adding new tokens. */
+ int talloc; /* Number of tokens currently allocated. */
+ int depth; /* Depth required of an evaluation stack
+ used for depth-first traversal of the
+ parse tree. */
+ int nleaves; /* Number of leaves on the parse tree. */
+ int nregexps; /* Count of parallel regexps being built
+ with dfaparse(). */
+
+ /* Stuff owned by the state builder. */
+ dfa_state *states; /* States of the dfa. */
+ int sindex; /* Index for adding new states. */
+ int salloc; /* Number of states currently allocated. */
+
+ /* Stuff built by the structure analyzer. */
+ position_set *follows; /* Array of follow sets, indexed by position
+ index. The follow of a position is the set
+ of positions containing characters that
+ could conceivably follow a character
+ matching the given position in a string
+ matching the regexp. Allocated to the
+ maximum possible position index. */
+ int searchflag; /* True if we are supposed to build a searching
+ as opposed to an exact matcher. A searching
+ matcher finds the first and shortest string
+ matching a regexp anywhere in the buffer,
+ whereas an exact matcher finds the longest
+ string matching, but anchored to the
+ beginning of the buffer. */
+
+ /* Stuff owned by the executor. */
+ int tralloc; /* Number of transition tables that have
+ slots so far. */
+ int trcount; /* Number of transition tables that have
+ actually been built. */
+ int **trans; /* Transition tables for states that can
+ never accept. If the transitions for a
+ state have not yet been computed, or the
+ state could possibly accept, its entry in
+ this table is NULL. */
+ int **realtrans; /* Trans always points to realtrans + 1; this
+ is so trans[-1] can contain NULL. */
+ int **fails; /* Transition tables after failing to accept
+ on a state that potentially could do so. */
+ int *success; /* Table of acceptance conditions used in
+ dfaexec and computed in build_state. */
+ int *newlines; /* Transitions on newlines. The entry for a
+ newline in any transition table is always
+ -1 so we can count lines without wasting
+ too many cycles. The transition for a
+ newline is stored separately and handled
+ as a special case. Newline is also used
+ as a sentinel at the end of the buffer. */
+ struct dfamust *musts; /* List of strings, at least one of which
+ is known to appear in any r.e. matching
+ the dfa. */
+};
+
+/* Some macros for user access to dfa internals. */
+
+/* ACCEPTING returns true if s could possibly be an accepting state of r. */
+#define ACCEPTING(s, r) ((r).states[s].constraint)
+
+/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
+ specified context. */
+#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
+ SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \
+ prevn, currn, prevl, currl)
+
+/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
+ regexps that a given state could accept. Parallel regexps are numbered
+ starting at 1. */
+#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
+
+/* Entry points. */
+
+#ifdef __STDC__
+
+/* dfasyntax() takes two arguments; the first sets the syntax bits described
+ earlier in this file, and the second sets the case-folding flag. */
+extern void dfasyntax(reg_syntax_t, int);
+
+/* Compile the given string of the given length into the given struct dfa.
+ Final argument is a flag specifying whether to build a searching or an
+ exact matcher. */
+extern void dfacomp(char *, size_t, struct dfa *, int);
+
+/* Execute the given struct dfa on the buffer of characters. The
+ first char * points to the beginning, and the second points to the
+ first character after the end of the buffer, which must be a writable
+ place so a sentinel end-of-buffer marker can be stored there. The
+ second-to-last argument is a flag telling whether to allow newlines to
+ be part of a string matching the regexp. The next-to-last argument,
+ if non-NULL, points to a place to increment every time we see a
+ newline. The final argument, if non-NULL, points to a flag that will
+ be set if further examination by a backtracking matcher is needed in
+ order to verify backreferencing; otherwise the flag will be cleared.
+ Returns NULL if no match is found, or a pointer to the first
+ character after the first & shortest matching string in the buffer. */
+extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
+
+/* Free the storage held by the components of a struct dfa. */
+extern void dfafree(struct dfa *);
+
+/* Entry points for people who know what they're doing. */
+
+/* Initialize the components of a struct dfa. */
+extern void dfainit(struct dfa *);
+
+/* Incrementally parse a string of given length into a struct dfa. */
+extern void dfaparse(char *, size_t, struct dfa *);
+
+/* Analyze a parsed regexp; second argument tells whether to build a searching
+ or an exact matcher. */
+extern void dfaanalyze(struct dfa *, int);
+
+/* Compute, for each possible character, the transitions out of a given
+ state, storing them in an array of integers. */
+extern void dfastate(int, struct dfa *, int []);
+
+/* Error handling. */
+
+/* dfaerror() is called by the regexp routines whenever an error occurs. It
+ takes a single argument, a NUL-terminated string describing the error.
+ The default dfaerror() prints the error message to stderr and exits.
+ The user can provide a different dfafree() if so desired. */
+extern void dfaerror(const char *);
+
+#else /* ! __STDC__ */
+extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
+extern void dfaanalyze(), dfastate(), dfaerror();
+extern char *dfaexec();
+#endif /* ! __STDC__ */
diff --git a/contrib/awk/doc/ChangeLog b/contrib/awk/doc/ChangeLog
new file mode 100644
index 0000000..660436a
--- /dev/null
+++ b/contrib/awk/doc/ChangeLog
@@ -0,0 +1,91 @@
+Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.3: Release tar file made.
+
+Fri Apr 18 07:55:47 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * BETA Release 3.0.34: Release tar file made.
+
+Sun Apr 13 15:39:20 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in ($(infodir)/gawk.info): exit 0 in case install-info
+ fails.
+
+Thu Jan 2 23:17:53 1997 Fred Fish <fnf@ninemoons.com>
+
+ * Makefile.in (awkcard.tr): Use ':' chars to separate parts of
+ sed command, since $(srcdir) may expand to something with '/'
+ characters in it, which confuses sed terribly.
+ * gawk.texi (Amiga Installation): Note change of configuration
+ from "m68k-cbm-amigados" to "m68k-amigaos". Point ftp users
+ towards current ADE distribution and not obsolete Aminet
+ "gcc" distribution. Change "FreshFish" to "Geek Gadgets".
+
+Wed Dec 25 11:25:22 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.2: Release tar file made.
+
+Wed Dec 25 11:17:32 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in ($(mandir)/igawk$(manext),$(mandir)/gawk$(manext)):
+ remove chmod command; let $(INSTALL_DATA) use -m.
+
+Tue Dec 17 22:38:28 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (gawk.info,gawk.dvi,postscript): run makeinfo, TeX,
+ and/or troff against files in $(srcdir). Thanks to Ulrich Drepper.
+ ($(infodir)/gawk.info): use --info-dir to install-info, not
+ --infodir.
+
+Tue Dec 10 23:09:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.1: Release tar file made.
+
+Mon Dec 9 12:48:54 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * no.colors: new file from Michal for old troffs.
+ * Makefile.in [AWKCARD]: changes to parameterize old/new troff.
+
+Sun Dec 1 15:04:56 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * texinfo.tex: Updated to version 2.193, from Karl Berry.
+
+Tue Nov 26 22:57:15 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in ($(infodir)/gawk.info): Change option in call
+ to `install-info' to `--info-dir' from `--infodir'.
+
+Mon Nov 4 13:30:39 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in: updates for reference card.
+ (ad.block, awkcard.in, cardfonts, colors, macros, setter.outline):
+ new files for reference card.
+
+Wed Oct 16 12:43:02 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * texinfo.tex: Updated to version 2.185, from texinfo-3.9 dist.
+
+Sun Aug 11 23:12:08 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in ($(infodir)/gawk.info): correct use of
+ $(INSTALL_DATA) and remove chmod command.
+
+Thu Jul 11 22:06:50 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in ($(mandir)/gawk.$(ext), $(mandir)/igawk.$(ext)):
+ made dependant on files in $(srcdir).
+
+Fri Mar 15 06:45:35 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (clean): add `*~' to list of files to be removed.
+
+Thu Jan 25 23:40:15 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (dvi): run texindex and tex an extra time.
+ This gets the cross references right. Sigh.
+
+Wed Jan 24 11:51:54 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (maintainer-clean):
+ Depend on distclean, not the other way around.
+ Output warning message as per GNU standards.
diff --git a/contrib/awk/doc/awk.1 b/contrib/awk/doc/awk.1
new file mode 100644
index 0000000..0568c16
--- /dev/null
+++ b/contrib/awk/doc/awk.1
@@ -0,0 +1,2621 @@
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "Dec 19 1996" "Free Software Foundation" "Utility Commands"
+.SH NAME
+gawk \- pattern scanning and processing language
+.SH SYNOPSIS
+.B gawk
+[ POSIX or GNU style options ]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B gawk
+[ POSIX or GNU style options ]
+[
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Gawk
+is the GNU Project's implementation of the AWK programming language.
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard.
+This version in turn is based on the description in
+.IR "The AWK Programming Language" ,
+by Aho, Kernighan, and Weinberger,
+with the additional features found in the System V Release 4 version
+of \*(UX
+.IR awk .
+.I Gawk
+also provides more recent Bell Labs
+.I awk
+extensions, and some GNU-specific extensions.
+.PP
+The command line consists of options to
+.I gawk
+itself, the AWK program text (if not supplied via the
+.B \-f
+or
+.B \-\^\-file
+options), and values to be made
+available in the
+.B ARGC
+and
+.B ARGV
+pre-defined AWK variables.
+.SH OPTION FORMAT
+.PP
+.I Gawk
+options may be either the traditional \*(PX one letter options,
+or the GNU style long options. \*(PX options start with a single ``\-'',
+while long options start with ``\-\^\-''.
+Long options are provided for both GNU-specific features and
+for \*(PX mandated features.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option. Multiple
+.B \-W
+options may be supplied
+Each
+.B \-W
+option has a corresponding long option, as detailed below.
+Arguments to long options are either joined with the option
+by an
+.B =
+sign, with no intervening spaces, or they may be provided in the
+next command line argument.
+Long options may be abbreviated, as long as the abbreviation
+remains unique.
+.SH OPTIONS
+.PP
+.I Gawk
+accepts the following options.
+.TP
+.PD 0
+.BI \-F " fs"
+.TP
+.PD
+.BI \-\^\-field-separator " fs"
+Use
+.I fs
+for the input field separator (the value of the
+.B FS
+predefined
+variable).
+.TP
+.PD 0
+\fB\-v\fI var\fB\^=\^\fIval\fR
+.TP
+.PD
+\fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR
+Assign the value
+.IR val ,
+to the variable
+.IR var ,
+before execution of the program begins.
+Such variable values are available to the
+.B BEGIN
+block of an AWK program.
+.TP
+.PD 0
+.BI \-f " program-file"
+.TP
+.PD
+.BI \-\^\-file " program-file"
+Read the AWK program source from the file
+.IR program-file ,
+instead of from the first command line argument.
+Multiple
+.B \-f
+(or
+.BR \-\^\-file )
+options may be used.
+.TP
+.PD 0
+.BI \-mf " NNN"
+.TP
+.PD
+.BI \-mr " NNN"
+Set various memory limits to the value
+.IR NNN .
+The
+.B f
+flag sets the maximum number of fields, and the
+.B r
+flag sets the maximum record size. These two flags and the
+.B \-m
+option are from the Bell Labs research version of \*(UX
+.IR awk .
+They are ignored by
+.IR gawk ,
+since
+.I gawk
+has no pre-defined limits.
+.TP
+.PD 0
+.B "\-W traditional"
+.TP
+.PD 0
+.B "\-W compat"
+.TP
+.PD 0
+.B \-\^\-traditional
+.TP
+.PD
+.B \-\^\-compat
+Run in
+.I compatibility
+mode. In compatibility mode,
+.I gawk
+behaves identically to \*(UX
+.IR awk ;
+none of the GNU-specific extensions are recognized.
+The use of
+.B \-\^\-traditional
+is preferred over the other forms of this option.
+See
+.BR "GNU EXTENSIONS" ,
+below, for more information.
+.TP
+.PD 0
+.B "\-W copyleft"
+.TP
+.PD 0
+.B "\-W copyright"
+.TP
+.PD 0
+.B \-\^\-copyleft
+.TP
+.PD
+.B \-\^\-copyright
+Print the short version of the GNU copyright information message on
+the standard output, and exits successfully.
+.TP
+.PD 0
+.B "\-W help"
+.TP
+.PD 0
+.B "\-W usage"
+.TP
+.PD 0
+.B \-\^\-help
+.TP
+.PD
+.B \-\^\-usage
+Print a relatively short summary of the available options on
+the standard output.
+(Per the
+.IR "GNU Coding Standards" ,
+these options cause an immediate, successful exit.)
+.TP
+.PD 0
+.B "\-W lint"
+.TP
+.PD
+.B \-\^\-lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.TP
+.PD 0
+.B "\-W lint\-old"
+.TP
+.PD
+.B \-\^\-lint\-old
+Provide warnings about constructs that are
+not portable to the original version of Unix
+.IR awk .
+.ig
+.\" This option is left undocumented, on purpose.
+.TP
+.PD 0
+.B "\-W nostalgia"
+.TP
+.PD
+.B \-\^\-nostalgia
+Provide a moment of nostalgia for long time
+.I awk
+users.
+..
+.TP
+.PD 0
+.B "\-W posix"
+.TP
+.PD
+.B \-\^\-posix
+This turns on
+.I compatibility
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+Only space and tab act as field separators when
+.B FS
+is set to a single space, newline does not.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.TP
+\(bu
+The
+.B fflush()
+function is not available.
+.RE
+.TP
+.PD 0
+.B "\-W re\-interval"
+.TP
+.PD
+.B \-\^\-re\-interval
+Enable the use of
+.I "interval expressions"
+in regular expression matching
+(see
+.BR "Regular Expressions" ,
+below).
+Interval expressions were not traditionally available in the
+AWK language. The POSIX standard added them, to make
+.I awk
+and
+.I egrep
+consistent with each other.
+However, their use is likely
+to break old AWK programs, so
+.I gawk
+only provides them if they are requested with this option, or when
+.B \-\^\-posix
+is specified.
+.TP
+.PD 0
+.BI "\-W source " program-text
+.TP
+.PD
+.BI \-\^\-source " program-text"
+Use
+.I program-text
+as AWK program source code.
+This option allows the easy intermixing of library functions (used via the
+.B \-f
+and
+.B \-\^\-file
+options) with source code entered on the command line.
+It is intended primarily for medium to large AWK programs used
+in shell scripts.
+.TP
+.PD 0
+.B "\-W version"
+.TP
+.PD
+.B \-\^\-version
+Print version information for this particular copy of
+.I gawk
+on the standard output.
+This is useful mainly for knowing if the current copy of
+.I gawk
+on your system
+is up to date with respect to whatever the Free Software Foundation
+is distributing.
+This is also useful when reporting bugs.
+(Per the
+.IR "GNU Coding Standards" ,
+these options cause an immediate, successful exit.)
+.TP
+.B \-\^\-
+Signal the end of options. This is useful to allow further arguments to the
+AWK program itself to start with a ``\-''.
+This is mainly for consistency with the argument parsing convention used
+by most other \*(PX programs.
+.PP
+In compatibility mode,
+any other options are flagged as illegal, but are otherwise ignored.
+In normal operation, as long as program text has been supplied, unknown
+options are passed on to the AWK program in the
+.B ARGV
+array for processing. This is particularly useful for running AWK
+programs via the ``#!'' executable interpreter mechanism.
+.SH AWK PROGRAM EXECUTION
+.PP
+An AWK program consists of a sequence of pattern-action statements
+and optional function definitions.
+.RS
+.PP
+\fIpattern\fB { \fIaction statements\fB }\fR
+.br
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
+.RE
+.PP
+.I Gawk
+first reads the program source from the
+.IR program-file (s)
+if specified,
+from arguments to
+.BR \-\^\-source ,
+or from the first non-option argument on the command line.
+The
+.B \-f
+and
+.B \-\^\-source
+options may be used multiple times on the command line.
+.I Gawk
+will read the program text as if all the
+.IR program-file s
+and command line source texts
+had been concatenated together. This is useful for building libraries
+of AWK functions, without having to include them in each new AWK
+program that uses them. It also provides the ability to mix library
+functions with command line programs.
+.PP
+The environment variable
+.B AWKPATH
+specifies a search path to use when finding source files named with
+the
+.B \-f
+option. If this variable does not exist, the default path is
+\fB".:/usr/local/share/awk"\fR.
+(The actual directory may vary, depending upon how
+.I gawk
+was built and installed.)
+If a file name given to the
+.B \-f
+option contains a ``/'' character, no path search is performed.
+.PP
+.I Gawk
+executes AWK programs in the following order.
+First,
+all variable assignments specified via the
+.B \-v
+option are performed.
+Next,
+.I gawk
+compiles the program into an internal form.
+Then,
+.I gawk
+executes the code in the
+.B BEGIN
+block(s) (if any),
+and then proceeds to read
+each file named in the
+.B ARGV
+array.
+If there are no files named on the command line,
+.I gawk
+reads the standard input.
+.PP
+If a filename on the command line has the form
+.IB var = val
+it is treated as a variable assignment. The variable
+.I var
+will be assigned the value
+.IR val .
+(This happens after any
+.B BEGIN
+block(s) have been run.)
+Command line variable assignment
+is most useful for dynamically assigning values to the variables
+AWK uses to control how input is broken into fields and records. It
+is also useful for controlling state if multiple passes are needed over
+a single data file.
+.PP
+If the value of a particular element of
+.B ARGV
+is empty (\fB""\fR),
+.I gawk
+skips over it.
+.PP
+For each record in the input,
+.I gawk
+tests to see if it matches any
+.I pattern
+in the AWK program.
+For each pattern that the record matches, the associated
+.I action
+is executed.
+The patterns are tested in the order they occur in the program.
+.PP
+Finally, after all the input is exhausted,
+.I gawk
+executes the code in the
+.B END
+block(s) (if any).
+.SH VARIABLES, RECORDS AND FIELDS
+AWK variables are dynamic; they come into existence when they are
+first used. Their values are either floating-point numbers or strings,
+or both,
+depending upon how they are used. AWK also has one dimensional
+arrays; arrays with multiple dimensions may be simulated.
+Several pre-defined variables are set as a program
+runs; these will be described as needed and summarized below.
+.SS Records
+Normally, records are separated by newline characters. You can control how
+records are separated by assigning values to the built-in variable
+.BR RS .
+If
+.B RS
+is any single character, that character separates records.
+Otherwise,
+.B RS
+is a regular expression. Text in the input that matches this
+regular expression will separate the record.
+However, in compatibility mode,
+only the first character of its string
+value is used for separating records.
+If
+.B RS
+is set to the null string, then records are separated by
+blank lines.
+When
+.B RS
+is set to the null string, the newline character always acts as
+a field separator, in addition to whatever value
+.B FS
+may have.
+.SS Fields
+.PP
+As each input record is read,
+.I gawk
+splits the record into
+.IR fields ,
+using the value of the
+.B FS
+variable as the field separator.
+If
+.B FS
+is a single character, fields are separated by that character.
+If
+.B FS
+is the null string, then each individual character becomes a
+separate field.
+Otherwise,
+.B FS
+is expected to be a full regular expression.
+In the special case that
+.B FS
+is a single space, fields are separated
+by runs of spaces and/or tabs and/or newlines.
+(But see the discussion of
+.BR \-\-posix ,
+below).
+Note that the value of
+.B IGNORECASE
+(see below) will also affect how fields are split when
+.B FS
+is a regular expression, and how records are separated when
+.B RS
+is a regular expression.
+.PP
+If the
+.B FIELDWIDTHS
+variable is set to a space separated list of numbers, each field is
+expected to have fixed width, and
+.I gawk
+will split up the record using the specified widths. The value of
+.B FS
+is ignored.
+Assigning a new value to
+.B FS
+overrides the use of
+.BR FIELDWIDTHS ,
+and restores the default behavior.
+.PP
+Each field in the input record may be referenced by its position,
+.BR $1 ,
+.BR $2 ,
+and so on.
+.B $0
+is the whole record. The value of a field may be assigned to as well.
+Fields need not be referenced by constants:
+.RS
+.PP
+.ft B
+n = 5
+.br
+print $n
+.ft R
+.RE
+.PP
+prints the fifth field in the input record.
+The variable
+.B NF
+is set to the total number of fields in the input record.
+.PP
+References to non-existent fields (i.e. fields after
+.BR $NF )
+produce the null-string. However, assigning to a non-existent field
+(e.g.,
+.BR "$(NF+2) = 5" )
+will increase the value of
+.BR NF ,
+create any intervening fields with the null string as their value, and
+cause the value of
+.B $0
+to be recomputed, with the fields being separated by the value of
+.BR OFS .
+References to negative numbered fields cause a fatal error.
+Decrementing
+.B NF
+causes the values of fields past the new value to be lost, and the value of
+.B $0
+to be recomputed, with the fields being separated by the value of
+.BR OFS .
+.SS Built-in Variables
+.PP
+.IR Gawk 's
+built-in variables are:
+.PP
+.TP \w'\fBFIELDWIDTHS\fR'u+1n
+.B ARGC
+The number of command line arguments (does not include options to
+.IR gawk ,
+or the program source).
+.TP
+.B ARGIND
+The index in
+.B ARGV
+of the current file being processed.
+.TP
+.B ARGV
+Array of command line arguments. The array is indexed from
+0 to
+.B ARGC
+\- 1.
+Dynamically changing the contents of
+.B ARGV
+can control the files used for data.
+.TP
+.B CONVFMT
+The conversion format for numbers, \fB"%.6g"\fR, by default.
+.TP
+.B ENVIRON
+An array containing the values of the current environment.
+The array is indexed by the environment variables, each element being
+the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
+.BR /home/arnold ).
+Changing this array does not affect the environment seen by programs which
+.I gawk
+spawns via redirection or the
+.B system()
+function.
+(This may change in a future version of
+.IR gawk .)
+.\" but don't hold your breath...
+.TP
+.B ERRNO
+If a system error occurs either doing a redirection for
+.BR getline ,
+during a read for
+.BR getline ,
+or during a
+.BR close() ,
+then
+.B ERRNO
+will contain
+a string describing the error.
+.TP
+.B FIELDWIDTHS
+A white-space separated list of fieldwidths. When set,
+.I gawk
+parses the input into fields of fixed width, instead of using the
+value of the
+.B FS
+variable as the field separator.
+The fixed field width facility is still experimental; the
+semantics may change as
+.I gawk
+evolves over time.
+.TP
+.B FILENAME
+The name of the current input file.
+If no files are specified on the command line, the value of
+.B FILENAME
+is ``\-''.
+However,
+.B FILENAME
+is undefined inside the
+.B BEGIN
+block.
+.TP
+.B FNR
+The input record number in the current input file.
+.TP
+.B FS
+The input field separator, a space by default. See
+.BR Fields ,
+above.
+.TP
+.B IGNORECASE
+Controls the case-sensitivity of all regular expression
+and string operations. If
+.B IGNORECASE
+has a non-zero value, then string comparisons and
+pattern matching in rules,
+field splitting with
+.BR FS ,
+record separating with
+.BR RS ,
+regular expression
+matching with
+.B ~
+and
+.BR !~ ,
+and the
+.BR gensub() ,
+.BR gsub() ,
+.BR index() ,
+.BR match() ,
+.BR split() ,
+and
+.B sub()
+pre-defined functions will all ignore case when doing regular expression
+operations. Thus, if
+.B IGNORECASE
+is not equal to zero,
+.B /aB/
+matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP,
+and \fB"AB"\fP.
+As with all AWK variables, the initial value of
+.B IGNORECASE
+is zero, so all regular expression and string
+operations are normally case-sensitive.
+Under Unix, the full ISO 8859-1 Latin-1 character set is used
+when ignoring case.
+.B NOTE:
+In versions of
+.I gawk
+prior to 3.0,
+.B IGNORECASE
+only affected regular expression operations. It now affects string
+comparisons as well.
+.TP
+.B NF
+The number of fields in the current input record.
+.TP
+.B NR
+The total number of input records seen so far.
+.TP
+.B OFMT
+The output format for numbers, \fB"%.6g"\fR, by default.
+.TP
+.B OFS
+The output field separator, a space by default.
+.TP
+.B ORS
+The output record separator, by default a newline.
+.TP
+.B RS
+The input record separator, by default a newline.
+.TP
+.B RT
+The record terminator.
+.I Gawk
+sets
+.B RT
+to the input text that matched the character or regular expression
+specified by
+.BR RS .
+.TP
+.B RSTART
+The index of the first character matched by
+.BR match() ;
+0 if no match.
+.TP
+.B RLENGTH
+The length of the string matched by
+.BR match() ;
+\-1 if no match.
+.TP
+.B SUBSEP
+The character used to separate multiple subscripts in array
+elements, by default \fB"\e034"\fR.
+.SS Arrays
+.PP
+Arrays are subscripted with an expression between square brackets
+.RB ( [ " and " ] ).
+If the expression is an expression list
+.RI ( expr ", " expr " ...)"
+then the array subscript is a string consisting of the
+concatenation of the (string) value of each expression,
+separated by the value of the
+.B SUBSEP
+variable.
+This facility is used to simulate multiply dimensioned
+arrays. For example:
+.PP
+.RS
+.ft B
+i = "A";\^ j = "B";\^ k = "C"
+.br
+x[i, j, k] = "hello, world\en"
+.ft R
+.RE
+.PP
+assigns the string \fB"hello, world\en"\fR to the element of the array
+.B x
+which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK
+are associative, i.e. indexed by string values.
+.PP
+The special operator
+.B in
+may be used in an
+.B if
+or
+.B while
+statement to see if an array has an index consisting of a particular
+value.
+.PP
+.RS
+.ft B
+.nf
+if (val in array)
+ print array[val]
+.fi
+.ft
+.RE
+.PP
+If the array has multiple subscripts, use
+.BR "(i, j) in array" .
+.PP
+The
+.B in
+construct may also be used in a
+.B for
+loop to iterate over all the elements of an array.
+.PP
+An element may be deleted from an array using the
+.B delete
+statement.
+The
+.B delete
+statement may also be used to delete the entire contents of an array,
+just by specifying the array name without a subscript.
+.SS Variable Typing And Conversion
+.PP
+Variables and fields
+may be (floating point) numbers, or strings, or both. How the
+value of a variable is interpreted depends upon its context. If used in
+a numeric expression, it will be treated as a number, if used as a string
+it will be treated as a string.
+.PP
+To force a variable to be treated as a number, add 0 to it; to force it
+to be treated as a string, concatenate it with the null string.
+.PP
+When a string must be converted to a number, the conversion is accomplished
+using
+.IR atof (3).
+A number is converted to a string by using the value of
+.B CONVFMT
+as a format string for
+.IR sprintf (3),
+with the numeric value of the variable as the argument.
+However, even though all numbers in AWK are floating-point,
+integral values are
+.I always
+converted as integers. Thus, given
+.PP
+.RS
+.ft B
+.nf
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+.fi
+.ft R
+.RE
+.PP
+the variable
+.B b
+has a string value of \fB"12"\fR and not \fB"12.00"\fR.
+.PP
+.I Gawk
+performs comparisons as follows:
+If two variables are numeric, they are compared numerically.
+If one value is numeric and the other has a string value that is a
+``numeric string,'' then comparisons are also done numerically.
+Otherwise, the numeric value is converted to a string and a string
+comparison is performed.
+Two strings are compared, of course, as strings.
+According to the \*(PX standard, even if two strings are
+numeric strings, a numeric comparison is performed. However, this is
+clearly incorrect, and
+.I gawk
+does not do this.
+.PP
+Note that string constants, such as \fB"57"\fP, are
+.I not
+numeric strings, they are string constants. The idea of ``numeric string''
+only applies to fields,
+.B getline
+input,
+.BR FILENAME ,
+.B ARGV
+elements,
+.B ENVIRON
+elements and the elements of an array created by
+.B split()
+that are numeric strings.
+The basic idea is that
+.IR "user input" ,
+and only user input, that looks numeric,
+should be treated that way.
+.PP
+Uninitialized variables have the numeric value 0 and the string value ""
+(the null, or empty, string).
+.SH PATTERNS AND ACTIONS
+AWK is a line oriented language. The pattern comes first, and then the
+action. Action statements are enclosed in
+.B {
+and
+.BR } .
+Either the pattern may be missing, or the action may be missing, but,
+of course, not both. If the pattern is missing, the action will be
+executed for every single record of input.
+A missing action is equivalent to
+.RS
+.PP
+.B "{ print }"
+.RE
+.PP
+which prints the entire record.
+.PP
+Comments begin with the ``#'' character, and continue until the
+end of the line.
+Blank lines may be used to separate statements.
+Normally, a statement ends with a newline, however, this is not the
+case for lines ending in
+a ``,'',
+.BR { ,
+.BR ? ,
+.BR : ,
+.BR && ,
+or
+.BR || .
+Lines ending in
+.B do
+or
+.B else
+also have their statements automatically continued on the following line.
+In other cases, a line can be continued by ending it with a ``\e'',
+in which case the newline will be ignored.
+.PP
+Multiple statements may
+be put on one line by separating them with a ``;''.
+This applies to both the statements within the action part of a
+pattern-action pair (the usual case),
+and to the pattern-action statements themselves.
+.SS Patterns
+AWK patterns may be one of the following:
+.PP
+.RS
+.nf
+.B BEGIN
+.B END
+.BI / "regular expression" /
+.I "relational expression"
+.IB pattern " && " pattern
+.IB pattern " || " pattern
+.IB pattern " ? " pattern " : " pattern
+.BI ( pattern )
+.BI ! " pattern"
+.IB pattern1 ", " pattern2
+.fi
+.RE
+.PP
+.B BEGIN
+and
+.B END
+are two special kinds of patterns which are not tested against
+the input.
+The action parts of all
+.B BEGIN
+patterns are merged as if all the statements had
+been written in a single
+.B BEGIN
+block. They are executed before any
+of the input is read. Similarly, all the
+.B END
+blocks are merged,
+and executed when all the input is exhausted (or when an
+.B exit
+statement is executed).
+.B BEGIN
+and
+.B END
+patterns cannot be combined with other patterns in pattern expressions.
+.B BEGIN
+and
+.B END
+patterns cannot have missing action parts.
+.PP
+For
+.BI / "regular expression" /
+patterns, the associated statement is executed for each input record that matches
+the regular expression.
+Regular expressions are the same as those in
+.IR egrep (1),
+and are summarized below.
+.PP
+A
+.I "relational expression"
+may use any of the operators defined below in the section on actions.
+These generally test whether certain fields match certain regular expressions.
+.PP
+The
+.BR && ,
+.BR || ,
+and
+.B !
+operators are logical AND, logical OR, and logical NOT, respectively, as in C.
+They do short-circuit evaluation, also as in C, and are used for combining
+more primitive pattern expressions. As in most languages, parentheses
+may be used to change the order of evaluation.
+.PP
+The
+.B ?\^:
+operator is like the same operator in C. If the first pattern is true
+then the pattern used for testing is the second pattern, otherwise it is
+the third. Only one of the second and third patterns is evaluated.
+.PP
+The
+.IB pattern1 ", " pattern2
+form of an expression is called a
+.IR "range pattern" .
+It matches all input records starting with a record that matches
+.IR pattern1 ,
+and continuing until a record that matches
+.IR pattern2 ,
+inclusive. It does not combine with any other sort of pattern expression.
+.SS Regular Expressions
+Regular expressions are the extended kind found in
+.IR egrep .
+They are composed of characters as follows:
+.TP \w'\fB[^\fIabc...\fB]\fR'u+2n
+.I c
+matches the non-metacharacter
+.IR c .
+.TP
+.I \ec
+matches the literal character
+.IR c .
+.TP
+.B .
+matches any character
+.I including
+newline.
+.TP
+.B ^
+matches the beginning of a string.
+.TP
+.B $
+matches the end of a string.
+.TP
+.BI [ abc... ]
+character list, matches any of the characters
+.IR abc... .
+.TP
+.BI [^ abc... ]
+negated character list, matches any character except
+.IR abc... .
+.TP
+.IB r1 | r2
+alternation: matches either
+.I r1
+or
+.IR r2 .
+.TP
+.I r1r2
+concatenation: matches
+.IR r1 ,
+and then
+.IR r2 .
+.TP
+.IB r +
+matches one or more
+.IR r 's.
+.TP
+.IB r *
+matches zero or more
+.IR r 's.
+.TP
+.IB r ?
+matches zero or one
+.IR r 's.
+.TP
+.BI ( r )
+grouping: matches
+.IR r .
+.TP
+.PD 0
+.IB r { n }
+.TP
+.PD 0
+.IB r { n ,}
+.TP
+.PD
+.IB r { n , m }
+One or two numbers inside braces denote an
+.IR "interval expression" .
+If there is one number in the braces, the preceding regexp
+.I r
+is repeated
+.I n
+times. If there are two numbers separated by a comma,
+.I r
+is repeated
+.I n
+to
+.I m
+times.
+If there is one number followed by a comma, then
+.I r
+is repeated at least
+.I n
+times.
+.sp .5
+Interval expressions are only available if either
+.B \-\^\-posix
+or
+.B \-\^\-re\-interval
+is specified on the command line.
+.TP
+.B \ey
+matches the empty string at either the beginning or the
+end of a word.
+.TP
+.B \eB
+matches the empty string within a word.
+.TP
+.B \e<
+matches the empty string at the beginning of a word.
+.TP
+.B \e>
+matches the empty string at the end of a word.
+.TP
+.B \ew
+matches any word-constituent character (letter, digit, or underscore).
+.TP
+.B \eW
+matches any character that is not word-constituent.
+.TP
+.B \e`
+matches the empty string at the beginning of a buffer (string).
+.TP
+.B \e'
+matches the empty string at the end of a buffer.
+.PP
+The escape sequences that are valid in string constants (see below)
+are also legal in regular expressions.
+.PP
+.I "Character classes"
+are a new feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but where the
+actual characters themselves can vary from country to country and/or
+from character set to character set. For example, the notion of what
+is an alphabetic character differs in the USA and in France.
+.PP
+A character class is only valid in a regexp
+.I inside
+the brackets of a character list. Character classes consist of
+.BR [: ,
+a keyword denoting the class, and
+.BR :] .
+Here are the character
+classes defined by the POSIX standard.
+.TP
+.B [:alnum:]
+Alphanumeric characters.
+.TP
+.B [:alpha:]
+Alphabetic characters.
+.TP
+.B [:blank:]
+Space or tab characters.
+.TP
+.B [:cntrl:]
+Control characters.
+.TP
+.B [:digit:]
+Numeric characters.
+.TP
+.B [:graph:]
+Characters that are both printable and visible.
+(A space is printable, but not visible, while an
+.B a
+is both.)
+.TP
+.B [:lower:]
+Lower-case alphabetic characters.
+.TP
+.B [:print:]
+Printable characters (characters that are not control characters.)
+.TP
+.B [:punct:]
+Punctuation characters (characters that are not letter, digits,
+control characters, or space characters).
+.TP
+.B [:space:]
+Space characters (such as space, tab, and formfeed, to name a few).
+.TP
+.B [:upper:]
+Upper-case alphabetic characters.
+.TP
+.B [:xdigit:]
+Characters that are hexadecimal digits.
+.PP
+For example, before the POSIX standard, to match alphanumeric
+characters, you would have had to write
+.BR /[A\-Za\-z0\-9]/ .
+If your character set had other alphabetic characters in it, this would not
+match them. With the POSIX character classes, you can write
+.BR /[[:alnum:]]/ ,
+and this will match
+.I all
+the alphabetic and numeric characters in your character set.
+.PP
+Two additional special sequences can appear in character lists.
+These apply to non-ASCII character sets, which can have single symbols
+(called
+.IR "collating elements" )
+that are represented with more than one
+character, as well as several characters that are equivalent for
+.IR collating ,
+or sorting, purposes. (E.g., in French, a plain ``e''
+and a grave-accented e\` are equivalent.)
+.TP
+Collating Symbols
+A collating symbols is a multi-character collating element enclosed in
+.B [.
+and
+.BR .] .
+For example, if
+.B ch
+is a collating element, then
+.B [[.ch.]]
+is a regexp that matches this collating element, while
+.B [ch]
+is a regexp that matches either
+.B c
+or
+.BR h .
+.TP
+Equivalence Classes
+An equivalence class is a locale-specific name for a list of
+characters that are equivalent. The name is enclosed in
+.B [=
+and
+.BR =] .
+For example, the name
+.B e
+might be used to represent all of
+``e,'' ``e\`,'' and ``e\`.''
+In this case,
+.B [[=e]]
+is a regexp
+that matches any of
+ .BR e ,
+ .BR e\' ,
+or
+ .BR e\` .
+.PP
+These features are very valuable in non-English speaking locales.
+The library functions that
+.I gawk
+uses for regular expression matching
+currently only recognize POSIX character classes; they do not recognize
+collating symbols or equivalence classes.
+.PP
+The
+.BR \ey ,
+.BR \eB ,
+.BR \e< ,
+.BR \e> ,
+.BR \ew ,
+.BR \eW ,
+.BR \e` ,
+and
+.B \e'
+operators are specific to
+.IR gawk ;
+they are extensions based on facilities in the GNU regexp libraries.
+.PP
+The various command line options
+control how
+.I gawk
+interprets characters in regexps.
+.TP
+No options
+In the default case,
+.I gawk
+provide all the facilities of
+POSIX regexps and the GNU regexp operators described above.
+However, interval expressions are not supported.
+.TP
+.B \-\^\-posix
+Only POSIX regexps are supported, the GNU operators are not special.
+(E.g.,
+.B \ew
+matches a literal
+.BR w ).
+Interval expressions are allowed.
+.TP
+.B \-\^\-traditional
+Traditional Unix
+.I awk
+regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes
+.RB ( [[:alnum:]]
+and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+.TP
+.B \-\^\-re\-interval
+Allow interval expressions in regexps, even if
+.B \-\^\-traditional
+has been provided.
+.SS Actions
+Action statements are enclosed in braces,
+.B {
+and
+.BR } .
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages. The operators, control statements,
+and input/output statements
+available are patterned after those in C.
+.SS Operators
+.PP
+The operators in AWK, in order of decreasing precedence, are
+.PP
+.TP "\w'\fB*= /= %= ^=\fR'u+1n"
+.BR ( \&... )
+Grouping
+.TP
+.B $
+Field reference.
+.TP
+.B "++ \-\^\-"
+Increment and decrement, both prefix and postfix.
+.TP
+.B ^
+Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
+the assignment operator).
+.TP
+.B "+ \- !"
+Unary plus, unary minus, and logical negation.
+.TP
+.B "* / %"
+Multiplication, division, and modulus.
+.TP
+.B "+ \-"
+Addition and subtraction.
+.TP
+.I space
+String concatenation.
+.TP
+.PD 0
+.B "< >"
+.TP
+.PD 0
+.B "<= >="
+.TP
+.PD
+.B "!= =="
+The regular relational operators.
+.TP
+.B "~ !~"
+Regular expression match, negated match.
+.B NOTE:
+Do not use a constant regular expression
+.RB ( /foo/ )
+on the left-hand side of a
+.B ~
+or
+.BR !~ .
+Only use one on the right-hand side. The expression
+.BI "/foo/ ~ " exp
+has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR.
+This is usually
+.I not
+what was intended.
+.TP
+.B in
+Array membership.
+.TP
+.B &&
+Logical AND.
+.TP
+.B ||
+Logical OR.
+.TP
+.B ?:
+The C conditional expression. This has the form
+.IB expr1 " ? " expr2 " : " expr3\c
+\&. If
+.I expr1
+is true, the value of the expression is
+.IR expr2 ,
+otherwise it is
+.IR expr3 .
+Only one of
+.I expr2
+and
+.I expr3
+is evaluated.
+.TP
+.PD 0
+.B "= += \-="
+.TP
+.PD
+.B "*= /= %= ^="
+Assignment. Both absolute assignment
+.BI ( var " = " value )
+and operator-assignment (the other forms) are supported.
+.SS Control Statements
+.PP
+The control statements are
+as follows:
+.PP
+.RS
+.nf
+\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR]
+\fBwhile (\fIcondition\fB) \fIstatement \fR
+\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR
+\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR
+\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR
+\fBbreak\fR
+\fBcontinue\fR
+\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
+\fBdelete \fIarray\^\fR
+\fBexit\fR [ \fIexpression\fR ]
+\fB{ \fIstatements \fB}
+.fi
+.RE
+.SS "I/O Statements"
+.PP
+The input/output statements are as follows:
+.PP
+.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n"
+.BI close( file )
+Close file (or pipe, see below).
+.TP
+.B getline
+Set
+.B $0
+from next input record; set
+.BR NF ,
+.BR NR ,
+.BR FNR .
+.TP
+.BI "getline <" file
+Set
+.B $0
+from next record of
+.IR file ;
+set
+.BR NF .
+.TP
+.BI getline " var"
+Set
+.I var
+from next input record; set
+.BR NR ,
+.BR FNR .
+.TP
+.BI getline " var" " <" file
+Set
+.I var
+from next record of
+.IR file .
+.TP
+.B next
+Stop processing the current input record. The next input record
+is read and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.TP
+.B "nextfile"
+Stop processing the current input file. The next input record read
+comes from the next input file.
+.B FILENAME
+and
+.B ARGIND
+are updated,
+.B FNR
+is reset to 1, and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.B NOTE:
+Earlier versions of gawk used
+.BR "next file" ,
+as two words. While this usage is still recognized, it generates a
+warning message and will eventually be removed.
+.TP
+.B print
+Prints the current record.
+The output record is terminated with the value of the
+.B ORS
+variable.
+.TP
+.BI print " expr-list"
+Prints expressions.
+Each expression is separated by the value of the
+.B OFS
+variable.
+The output record is terminated with the value of the
+.B ORS
+variable.
+.TP
+.BI print " expr-list" " >" file
+Prints expressions on
+.IR file .
+Each expression is separated by the value of the
+.B OFS
+variable. The output record is terminated with the value of the
+.B ORS
+variable.
+.TP
+.BI printf " fmt, expr-list"
+Format and print.
+.TP
+.BI printf " fmt, expr-list" " >" file
+Format and print on
+.IR file .
+.TP
+.BI system( cmd-line )
+Execute the command
+.IR cmd-line ,
+and return the exit status.
+(This may not be available on non-\*(PX systems.)
+.TP
+\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR
+Flush any buffers associated with the open output file or pipe
+.IR file .
+If
+.I file
+is missing, then standard output is flushed.
+If
+.I file
+is the null string,
+then all open output files and pipes
+have their buffers flushed.
+.PP
+Other input/output redirections are also allowed. For
+.B print
+and
+.BR printf ,
+.BI >> file
+appends output to the
+.IR file ,
+while
+.BI | " command"
+writes on a pipe.
+In a similar fashion,
+.IB command " | getline"
+pipes into
+.BR getline .
+The
+.BR getline
+command will return 0 on end of file, and \-1 on an error.
+.SS The \fIprintf\fP\^ Statement
+.PP
+The AWK versions of the
+.B printf
+statement and
+.B sprintf()
+function
+(see below)
+accept the following conversion specification formats:
+.TP
+.B %c
+An \s-1ASCII\s+1 character.
+If the argument used for
+.B %c
+is numeric, it is treated as a character and printed.
+Otherwise, the argument is assumed to be a string, and the only first
+character of that string is printed.
+.TP
+.PD 0
+.B %d
+.TP
+.PD
+.B %i
+A decimal number (the integer part).
+.TP
+.PD 0
+.B %e
+.TP
+.PD
+.B %E
+A floating point number of the form
+.BR [\-]d.dddddde[+\^\-]dd .
+The
+.B %E
+format uses
+.B E
+instead of
+.BR e .
+.TP
+.B %f
+A floating point number of the form
+.BR [\-]ddd.dddddd .
+.TP
+.PD 0
+.B %g
+.TP
+.PD
+.B %G
+Use
+.B %e
+or
+.B %f
+conversion, whichever is shorter, with nonsignificant zeros suppressed.
+The
+.B %G
+format uses
+.B %E
+instead of
+.BR %e .
+.TP
+.B %o
+An unsigned octal number (again, an integer).
+.TP
+.B %s
+A character string.
+.TP
+.PD 0
+.B %x
+.TP
+.PD
+.B %X
+An unsigned hexadecimal number (an integer).
+.The
+.B %X
+format uses
+.B ABCDEF
+instead of
+.BR abcdef .
+.TP
+.B %%
+A single
+.B %
+character; no argument is converted.
+.PP
+There are optional, additional parameters that may lie between the
+.B %
+and the control letter:
+.TP
+.B \-
+The expression should be left-justified within its field.
+.TP
+.I space
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+.TP
+.B +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The
+.B +
+overrides the space modifier.
+.TP
+.B #
+Use an ``alternate form'' for certain control letters.
+For
+.BR %o ,
+supply a leading zero.
+For
+.BR %x ,
+and
+.BR %X ,
+supply a leading
+.BR 0x
+or
+.BR 0X
+for
+a nonzero result.
+For
+.BR %e ,
+.BR %E ,
+and
+.BR %f ,
+the result will always contain a
+decimal point.
+For
+.BR %g ,
+and
+.BR %G ,
+trailing zeros are not removed from the result.
+.TP
+.B 0
+A leading
+.B 0
+(zero) acts as a flag, that indicates output should be
+padded with zeroes instead of spaces.
+This applies even to non-numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to be printed.
+.TP
+.I width
+The field should be padded to this width. The field is normally padded
+with spaces. If the
+.B 0
+flag has been used, it is padded with zeroes.
+.TP
+.BI \&. prec
+A number that specifies the precision to use when printing.
+For the
+.BR %e ,
+.BR %E ,
+and
+.BR %f
+formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the
+.BR %g ,
+and
+.B %G
+formats, it specifies the maximum number
+of significant digits. For the
+.BR %d ,
+.BR %o ,
+.BR %i ,
+.BR %u ,
+.BR %x ,
+and
+.B %X
+formats, it specifies the minimum number of
+digits to print. For a string, it specifies the maximum number of
+characters from the string that should be printed.
+.PP
+The dynamic
+.I width
+and
+.I prec
+capabilities of the \*(AN C
+.B printf()
+routines are supported.
+A
+.B *
+in place of either the
+.B width
+or
+.B prec
+specifications will cause their values to be taken from
+the argument list to
+.B printf
+or
+.BR sprintf() .
+.SS Special File Names
+.PP
+When doing I/O redirection from either
+.B print
+or
+.B printf
+into a file,
+or via
+.B getline
+from a file,
+.I gawk
+recognizes certain special filenames internally. These filenames
+allow access to open file descriptors inherited from
+.IR gawk 's
+parent process (usually the shell).
+Other special filenames provide access to information about the running
+.B gawk
+process.
+The filenames are:
+.TP \w'\fB/dev/stdout\fR'u+1n
+.B /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with spaces.
+.B $1
+is the value of the
+.IR getuid (2)
+system call,
+.B $2
+is the value of the
+.IR geteuid (2)
+system call,
+.B $3
+is the value of the
+.IR getgid (2)
+system call, and
+.B $4
+is the value of the
+.IR getegid (2)
+system call.
+If there are any additional fields, they are the group IDs returned by
+.IR getgroups (2).
+Multiple groups may not be supported on all systems.
+.TP
+.B /dev/stdin
+The standard input.
+.TP
+.B /dev/stdout
+The standard output.
+.TP
+.B /dev/stderr
+The standard error output.
+.TP
+.BI /dev/fd/\^ n
+The file associated with the open file descriptor
+.IR n .
+.PP
+These are particularly useful for error messages. For example:
+.PP
+.RS
+.ft B
+print "You blew it!" > "/dev/stderr"
+.ft R
+.RE
+.PP
+whereas you would otherwise have to use
+.PP
+.RS
+.ft B
+print "You blew it!" | "cat 1>&2"
+.ft R
+.RE
+.PP
+These file names may also be used on the command line to name data files.
+.SS Numeric Functions
+.PP
+AWK has the following pre-defined arithmetic functions:
+.PP
+.TP \w'\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR'u+1n
+.BI atan2( y , " x" )
+returns the arctangent of
+.I y/x
+in radians.
+.TP
+.BI cos( expr )
+returns the cosine of
+.IR expr ,
+which is in radians.
+.TP
+.BI exp( expr )
+the exponential function.
+.TP
+.BI int( expr )
+truncates to integer.
+.TP
+.BI log( expr )
+the natural logarithm function.
+.TP
+.B rand()
+returns a random number between 0 and 1.
+.TP
+.BI sin( expr )
+returns the sine of
+.IR expr ,
+which is in radians.
+.TP
+.BI sqrt( expr )
+the square root function.
+.TP
+\&\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR
+uses
+.I expr
+as a new seed for the random number generator. If no
+.I expr
+is provided, the time of day will be used.
+The return value is the previous seed for the random
+number generator.
+.SS String Functions
+.PP
+.I Gawk
+has the following pre-defined string functions:
+.PP
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+\fBgensub(\fIr\fB, \fIs\fB, \fIh \fR[\fB, \fIt\fR]\fB)\fR
+search the target string
+.I t
+for matches of the regular expression
+.IR r .
+If
+.I h
+is a string beginning with
+.B g
+or
+.BR G ,
+then replace all matches of
+.I r
+with
+.IR s .
+Otherwise,
+.I h
+is a number indicating which match of
+.I r
+to replace.
+If no
+.I t
+is supplied,
+.B $0
+is used instead.
+Within the replacement text
+.IR s ,
+the sequence
+.BI \e n\fR,
+where
+.I n
+is a digit from 1 to 9, may be used to indicate just the text that
+matched the
+.IR n 'th
+parenthesized subexpression. The sequence
+.B \e0
+represents the entire matched text, as does the character
+.BR & .
+Unlike
+.B sub()
+and
+.BR gsub() ,
+the modified string is returned as the result of the function,
+and the original target string is
+.I not
+changed.
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+\fBgsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
+for each substring matching the regular expression
+.I r
+in the string
+.IR t ,
+substitute the string
+.IR s ,
+and return the number of substitutions.
+If
+.I t
+is not supplied, use
+.BR $0 .
+An
+.B &
+in the replacement text is replaced with the text that was actually matched.
+Use
+.B \e&
+to get a literal
+.BR & .
+See
+.I "AWK Language Programming"
+for a fuller discussion of the rules for
+.BR &'s
+and backslashes in the replacement text of
+.BR sub() ,
+.BR gsub() ,
+and
+.BR gensub() .
+.TP
+.BI index( s , " t" )
+returns the index of the string
+.I t
+in the string
+.IR s ,
+or 0 if
+.I t
+is not present.
+.TP
+\fBlength(\fR[\fIs\fR]\fB)
+returns the length of the string
+.IR s ,
+or the length of
+.B $0
+if
+.I s
+is not supplied.
+.TP
+.BI match( s , " r" )
+returns the position in
+.I s
+where the regular expression
+.I r
+occurs, or 0 if
+.I r
+is not present, and sets the values of
+.B RSTART
+and
+.BR RLENGTH .
+.TP
+\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR]\fB)\fR
+splits the string
+.I s
+into the array
+.I a
+on the regular expression
+.IR r ,
+and returns the number of fields. If
+.I r
+is omitted,
+.B FS
+is used instead.
+The array
+.I a
+is cleared first.
+Splitting behaves identically to field splitting, described above.
+.TP
+.BI sprintf( fmt , " expr-list" )
+prints
+.I expr-list
+according to
+.IR fmt ,
+and returns the resulting string.
+.TP
+\fBsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
+just like
+.BR gsub() ,
+but only the first matching substring is replaced.
+.TP
+\fBsubstr(\fIs\fB, \fIi \fR[\fB, \fIn\fR]\fB)\fR
+returns the at most
+.IR n -character
+substring of
+.I s
+starting at
+.IR i .
+If
+.I n
+is omitted, the rest of
+.I s
+is used.
+.TP
+.BI tolower( str )
+returns a copy of the string
+.IR str ,
+with all the upper-case characters in
+.I str
+translated to their corresponding lower-case counterparts.
+Non-alphabetic characters are left unchanged.
+.TP
+.BI toupper( str )
+returns a copy of the string
+.IR str ,
+with all the lower-case characters in
+.I str
+translated to their corresponding upper-case counterparts.
+Non-alphabetic characters are left unchanged.
+.SS Time Functions
+.PP
+Since one of the primary uses of AWK programs is processing log files
+that contain time stamp information,
+.I gawk
+provides the following two functions for obtaining time stamps and
+formatting them.
+.PP
+.TP "\w'\fBsystime()\fR'u+1n"
+.B systime()
+returns the current time of day as the number of seconds since the Epoch
+(Midnight UTC, January 1, 1970 on \*(PX systems).
+.TP
+\fBstrftime(\fR[\fIformat \fR[\fB, \fItimestamp\fR]]\fB)\fR
+formats
+.I timestamp
+according to the specification in
+.IR format.
+The
+.I timestamp
+should be of the same form as returned by
+.BR systime() .
+If
+.I timestamp
+is missing, the current time of day is used.
+If
+.I format
+is missing, a default format equivalent to the output of
+.IR date (1)
+will be used.
+See the specification for the
+.B strftime()
+function in \*(AN C for the format conversions that are
+guaranteed to be available.
+A public-domain version of
+.IR strftime (3)
+and a man page for it come with
+.IR gawk ;
+if that version was used to build
+.IR gawk ,
+then all of the conversions described in that man page are available to
+.IR gawk.
+.SS String Constants
+.PP
+String constants in AWK are sequences of characters enclosed
+between double quotes (\fB"\fR). Within strings, certain
+.I "escape sequences"
+are recognized, as in C. These are:
+.PP
+.TP \w'\fB\e\^\fIddd\fR'u+1n
+.B \e\e
+A literal backslash.
+.TP
+.B \ea
+The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character.
+.TP
+.B \eb
+backspace.
+.TP
+.B \ef
+form-feed.
+.TP
+.B \en
+newline.
+.TP
+.B \er
+carriage return.
+.TP
+.B \et
+horizontal tab.
+.TP
+.B \ev
+vertical tab.
+.TP
+.BI \ex "\^hex digits"
+The character represented by the string of hexadecimal digits following
+the
+.BR \ex .
+As in \*(AN C, all following hexadecimal digits are considered part of
+the escape sequence.
+(This feature should tell us something about language design by committee.)
+E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
+.BI \e ddd
+The character represented by the 1-, 2-, or 3-digit sequence of octal
+digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
+.BI \e c
+The literal character
+.IR c\^ .
+.PP
+The escape sequences may also be used inside constant regular expressions
+(e.g.,
+.B "/[\ \et\ef\en\er\ev]/"
+matches whitespace characters).
+.PP
+In compatibility mode, the characters represented by octal and
+hexadecimal escape sequences are treated literally when used in
+regexp constants. Thus,
+.B /a\e52b/
+is equivalent to
+.BR /a\e*b/ .
+.SH FUNCTIONS
+Functions in AWK are defined as follows:
+.PP
+.RS
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR
+.RE
+.PP
+Functions are executed when they are called from within expressions
+in either patterns or actions. Actual parameters supplied in the function
+call are used to instantiate the formal parameters declared in the function.
+Arrays are passed by reference, other variables are passed by value.
+.PP
+Since functions were not originally part of the AWK language, the provision
+for local variables is rather clumsy: They are declared as extra parameters
+in the parameter list. The convention is to separate local variables from
+real parameters by extra spaces in the parameter list. For example:
+.PP
+.RS
+.ft B
+.nf
+function f(p, q, a, b) # a & b are local
+{
+ \&.....
+}
+
+/abc/ { ... ; f(1, 2) ; ... }
+.fi
+.ft R
+.RE
+.PP
+The left parenthesis in a function call is required
+to immediately follow the function name,
+without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+This restriction does not apply to the built-in functions listed above.
+.PP
+Functions may call each other and may be recursive.
+Function parameters used as local variables are initialized
+to the null string and the number zero upon function invocation.
+.PP
+If
+.B \-\^\-lint
+has been provided,
+.I gawk
+will warn about calls to undefined functions at parse time,
+instead of at run time.
+Calling an undefined function at run time is a fatal error.
+.PP
+The word
+.B func
+may be used in place of
+.BR function .
+.SH EXAMPLES
+.nf
+Print and sort the login names of all users:
+
+.ft B
+ BEGIN { FS = ":" }
+ { print $1 | "sort" }
+
+.ft R
+Count lines in a file:
+
+.ft B
+ { nlines++ }
+ END { print nlines }
+
+.ft R
+Precede each line by its number in the file:
+
+.ft B
+ { print FNR, $0 }
+
+.ft R
+Concatenate and line number (a variation on a theme):
+
+.ft B
+ { print NR, $0 }
+.ft R
+.fi
+.SH SEE ALSO
+.IR egrep (1),
+.IR getpid (2),
+.IR getppid (2),
+.IR getpgrp (2),
+.IR getuid (2),
+.IR geteuid (2),
+.IR getgid (2),
+.IR getegid (2),
+.IR getgroups (2)
+.PP
+.IR "The AWK Programming Language" ,
+Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
+Addison-Wesley, 1988. ISBN 0-201-07981-X.
+.PP
+.IR "AWK Language Programming" ,
+Edition 1.0, published by the Free Software Foundation, 1995.
+.SH POSIX COMPATIBILITY
+A primary goal for
+.I gawk
+is compatibility with the \*(PX standard, as well as with the
+latest version of \*(UX
+.IR awk .
+To this end,
+.I gawk
+incorporates the following user visible
+features which are not described in the AWK book,
+but are part of the Bell Labs version of
+.IR awk ,
+and are in the \*(PX standard.
+.PP
+The
+.B \-v
+option for assigning variables before program execution starts is new.
+The book indicates that command line variable assignment happens when
+.I awk
+would otherwise open the argument as a file, which is after the
+.B BEGIN
+block is executed. However, in earlier implementations, when such an
+assignment appeared before any file names, the assignment would happen
+.I before
+the
+.B BEGIN
+block was run. Applications came to depend on this ``feature.''
+When
+.I awk
+was changed to match its documentation, this option was added to
+accommodate applications that depended upon the old behavior.
+(This feature was agreed upon by both the AT&T and GNU developers.)
+.PP
+The
+.B \-W
+option for implementation specific features is from the \*(PX standard.
+.PP
+When processing arguments,
+.I gawk
+uses the special option ``\fB\-\^\-\fP'' to signal the end of
+arguments.
+In compatibility mode, it will warn about, but otherwise ignore,
+undefined options.
+In normal operation, such arguments are passed on to the AWK program for
+it to process.
+.PP
+The AWK book does not define the return value of
+.BR srand() .
+The \*(PX standard
+has it return the seed it was using, to allow keeping track
+of random number sequences. Therefore
+.B srand()
+in
+.I gawk
+also returns its current seed.
+.PP
+Other new features are:
+The use of multiple
+.B \-f
+options (from MKS
+.IR awk );
+the
+.B ENVIRON
+array; the
+.BR \ea ,
+and
+.BR \ev
+escape sequences (done originally in
+.I gawk
+and fed back into AT&T's); the
+.B tolower()
+and
+.B toupper()
+built-in functions (from AT&T); and the \*(AN C conversion specifications in
+.B printf
+(done first in AT&T's version).
+.SH GNU EXTENSIONS
+.I Gawk
+has a number of extensions to \*(PX
+.IR awk .
+They are described in this section. All the extensions described here
+can be disabled by
+invoking
+.I gawk
+with the
+.B \-\^\-traditional
+option.
+.PP
+The following features of
+.I gawk
+are not available in
+\*(PX
+.IR awk .
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B \ex
+escape sequence.
+(Disabled with
+.BR \-\^\-posix .)
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B fflush()
+function.
+(Disabled with
+.BR \-\^\-posix .)
+.TP
+\(bu
+The
+.BR systime(),
+.BR strftime(),
+and
+.B gensub()
+functions.
+.TP
+\(bu
+The special file names available for I/O redirection are not recognized.
+.TP
+\(bu
+The
+.BR ARGIND ,
+.BR ERRNO ,
+and
+.B RT
+variables are not special.
+.TP
+\(bu
+The
+.B IGNORECASE
+variable and its side-effects are not available.
+.TP
+\(bu
+The
+.B FIELDWIDTHS
+variable and fixed-width field splitting.
+.TP
+\(bu
+The use of
+.B RS
+as a regular expression.
+.TP
+\(bu
+The ability to split out individual characters using the null string
+as the value of
+.BR FS ,
+and as the third argument to
+.BR split() .
+.TP
+\(bu
+No path search is performed for files named via the
+.B \-f
+option. Therefore the
+.B AWKPATH
+environment variable is not special.
+.TP
+\(bu
+The use of
+.B "nextfile"
+to abandon processing of the current input file.
+.TP
+\(bu
+The use of
+.BI delete " array"
+to delete the entire contents of an array.
+.RE
+.PP
+The AWK book does not define the return value of the
+.B close()
+function.
+.IR Gawk\^ 's
+.B close()
+returns the value from
+.IR fclose (3),
+or
+.IR pclose (3),
+when closing a file or pipe, respectively.
+.PP
+When
+.I gawk
+is invoked with the
+.B \-\^\-traditional
+option,
+if the
+.I fs
+argument to the
+.B \-F
+option is ``t'', then
+.B FS
+will be set to the tab character.
+Note that typing
+.B "gawk \-F\et \&..."
+simply causes the shell to quote the ``t,'', and does not pass
+``\et'' to the
+.B \-F
+option.
+Since this is a rather ugly special case, it is not the default behavior.
+This behavior also does not occur if
+.B \-\^\-posix
+has been specified.
+To really get a tab character as the field separator, it is best to use
+quotes:
+.BR "gawk \-F'\et' \&..." .
+.ig
+.PP
+If
+.I gawk
+was compiled for debugging, it will
+accept the following additional options:
+.TP
+.PD 0
+.B \-Wparsedebug
+.TP
+.PD
+.B \-\^\-parsedebug
+Turn on
+.IR yacc (1)
+or
+.IR bison (1)
+debugging output during program parsing.
+This option should only be of interest to the
+.I gawk
+maintainers, and may not even be compiled into
+.IR gawk .
+..
+.SH HISTORICAL FEATURES
+There are two features of historical AWK implementations that
+.I gawk
+supports.
+First, it is possible to call the
+.B length()
+built-in function not only with no argument, but even without parentheses!
+Thus,
+.RS
+.PP
+.ft B
+a = length # Holy Algol 60, Batman!
+.ft R
+.RE
+.PP
+is the same as either of
+.RS
+.PP
+.ft B
+a = length()
+.br
+a = length($0)
+.ft R
+.RE
+.PP
+This feature is marked as ``deprecated'' in the \*(PX standard, and
+.I gawk
+will issue a warning about its use if
+.B \-\^\-lint
+is specified on the command line.
+.PP
+The other feature is the use of either the
+.B continue
+or the
+.B break
+statements outside the body of a
+.BR while ,
+.BR for ,
+or
+.B do
+loop. Traditional AWK implementations have treated such usage as
+equivalent to the
+.B next
+statement.
+.I Gawk
+will support this usage if
+.B \-\^\-traditional
+has been specified.
+.SH ENVIRONMENT VARIABLES
+If
+.B POSIXLY_CORRECT
+exists in the environment, then
+.I gawk
+behaves exactly as if
+.B \-\^\-posix
+had been specified on the command line.
+If
+.B \-\^\-lint
+has been specified,
+.I gawk
+will issue a warning message to this effect.
+.PP
+The
+.B AWKPATH
+environment variable can be used to provide a list of directories that
+.I gawk
+will search when looking for files named via the
+.B \-f
+and
+.B \-\^\-file
+options.
+.SH BUGS
+The
+.B \-F
+option is not necessary given the command line variable assignment feature;
+it remains only for backwards compatibility.
+.PP
+If your system actually has support for
+.B /dev/fd
+and the associated
+.BR /dev/stdin ,
+.BR /dev/stdout ,
+and
+.B /dev/stderr
+files, you may get different output from
+.I gawk
+than you would get on a system without those files. When
+.I gawk
+interprets these files internally, it synchronizes output to the standard
+output with output to
+.BR /dev/stdout ,
+while on a system with those files, the output is actually to different
+open files.
+Caveat Emptor.
+.PP
+Syntactically invalid single character programs tend to overflow
+the parse stack, generating a rather unhelpful message. Such programs
+are surprisingly difficult to diagnose in the completely general case,
+and the effort to do so really is not worth it.
+.SH VERSION INFORMATION
+This man page documents
+.IR gawk ,
+version 3.0.2.
+.SH AUTHORS
+The original version of \*(UX
+.I awk
+was designed and implemented by Alfred Aho,
+Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan
+continues to maintain and enhance it.
+.PP
+Paul Rubin and Jay Fenlason,
+of the Free Software Foundation, wrote
+.IR gawk ,
+to be compatible with the original version of
+.I awk
+distributed in Seventh Edition \*(UX.
+John Woods contributed a number of bug fixes.
+David Trueman, with contributions
+from Arnold Robbins, made
+.I gawk
+compatible with the new version of \*(UX
+.IR awk .
+Arnold Robbins is the current maintainer.
+.PP
+The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
+Scott Deifik is the current DOS maintainer. Pat Rankin did the
+port to VMS, and Michal Jaegermann did the port to the Atari ST.
+The port to OS/2 was done by Kai Uwe Rommel, with contributions and
+help from Darrel Hankerson. Fred Fish supplied support for the Amiga.
+.SH BUG REPORTS
+If you find a bug in
+.IR gawk ,
+please send electronic mail to
+.BR bug-gnu-utils@prep.ai.mit.edu ,
+.I with
+a carbon copy to
+.BR arnold@gnu.ai.mit.edu .
+Please include your operating system and its revision, the version of
+.IR gawk ,
+what C compiler you used to compile it, and a test program
+and data that are as small as possible for reproducing the problem.
+.PP
+Before sending a bug report, please do two things. First, verify that
+you have the latest version of
+.IR gawk .
+Many bugs (usually subtle ones) are fixed at each release, and if
+yours is out of date, the problem may already have been solved.
+Second, please read this man page and the reference manual carefully to
+be sure that what you think is a bug really is, instead of just a quirk
+in the language.
+.PP
+Whatever you do, do
+.B NOT
+post a bug report in
+.BR comp.lang.awk .
+While the
+.I gawk
+developers occasionally read this newsgroup, posting bug reports there
+is an unreliable way to report bugs. Instead, please use the electronic mail
+addresses given above.
+.SH ACKNOWLEDGEMENTS
+Brian Kernighan of Bell Labs
+provided valuable assistance during testing and debugging.
+We thank him.
+.SH COPYING PERMISSIONS
+Copyright \(co) 1996 Free Software Foundation, Inc.
+.PP
+Permission is granted to make and distribute verbatim copies of
+this manual page provided the copyright notice and this permission
+notice are preserved on all copies.
+.ig
+Permission is granted to process this file through troff and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual page).
+..
+.PP
+Permission is granted to copy and distribute modified versions of this
+manual page under the conditions for verbatim copying, provided that
+the entire resulting derived work is distributed under the terms of a
+permission notice identical to this one.
+.PP
+Permission is granted to copy and distribute translations of this
+manual page into another language, under the above conditions for
+modified versions, except that this permission notice may be stated in
+a translation approved by the Foundation.
diff --git a/contrib/awk/doc/gawk.texi b/contrib/awk/doc/gawk.texi
new file mode 100644
index 0000000..8c2aad2
--- /dev/null
+++ b/contrib/awk/doc/gawk.texi
@@ -0,0 +1,20820 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename gawk.info
+@settitle The GNU Awk User's Guide
+@c %**end of header (This is for running Texinfo on a region.)
+
+@c inside ifinfo for older versions of texinfo.tex
+@ifinfo
+@c I hope this is the right category
+@dircategory Programming Languages
+@direntry
+* Gawk: (gawk.info). A Text Scanning and Processing Language.
+@end direntry
+@end ifinfo
+
+@c @set xref-automatic-section-title
+@c @set DRAFT
+
+@c The following information should be updated here only!
+@c This sets the edition of the document, the version of gawk it
+@c applies to, and when the document was updated.
+@set TITLE Effective AWK Programming
+@set SUBTITLE A User's Guide for GNU Awk
+@set PATCHLEVEL 3
+@set EDITION 1.0.@value{PATCHLEVEL}
+@set VERSION 3.0
+@set UPDATE-MONTH February 1997
+@iftex
+@set DOCUMENT book
+@end iftex
+@ifinfo
+@set DOCUMENT Info file
+@end ifinfo
+
+@ignore
+Some comments on the layout for TeX.
+1. Use at least texinfo.tex 2.159. It contains fixes that
+ are needed to get the footings for draft mode to not appear.
+2. I have done A LOT of work to make this look good. There are `@page' commands
+ and use of `@group ... @end group' in a number of places. If you muck
+ with anything, it's your responsibility not to break the layout.
+@end ignore
+
+@c merge the function and variable indexes into the concept index
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long. Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@ifclear DRAFT
+@iftex
+@finalout
+@end iftex
+@end ifclear
+
+@smallbook
+@iftex
+@c @cropmarks
+@end iftex
+
+@ifinfo
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}},
+for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation of AWK.
+
+Copyright (C) 1989, 1991, 92, 93, 96, 97 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@setchapternewpage odd
+
+@titlepage
+@title @value{TITLE}
+@subtitle @value{SUBTITLE}
+@subtitle Edition @value{EDITION}
+@subtitle @value{UPDATE-MONTH}
+@author Arnold D. Robbins
+@ignore
+@sp 1
+@author Based on @cite{The GAWK Manual},
+@author by Robbins, Close, Rubin, and Stallman
+@end ignore
+
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off. Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+@ifset LEGALJUNK
+The programs and applications presented in this book have been
+included for their instructional value. They have been tested with care,
+but are not guaranteed for any particular purpose. The publisher does not
+offer any warranties or representations, nor does it accept any
+liabilities with respect to the programs or applications.
+So there.
+@sp 2
+UNIX is a registered trademark of X/Open, Ltd. @*
+Microsoft, MS, and MS-DOS are registered trademarks, and Windows is a
+trademark of Microsoft Corporation in the United States and other
+countries. @*
+Atari, 520ST, 1040ST, TT, STE, Mega, and Falcon are registered trademarks
+or trademarks of Atari Corporation. @*
+DEC, Digital, OpenVMS, ULTRIX, and VMS, are trademarks of Digital Equipment
+Corporation. @*
+@end ifset
+``To boldly go where no man has gone before'' is a
+Registered Trademark of Paramount Pictures Corporation. @*
+@c sorry, i couldn't resist
+@sp 3
+Copyright @copyright{} 1989, 1991, 92, 93, 96, 97 Free Software Foundation, Inc.
+@sp 2
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}}, @*
+for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU implementation of AWK.
+
+@sp 2
+@center Published jointly by:
+
+@multitable {Specialized Systems Consultants, Inc. (SSC)} {Boston, MA 02111-1307 USA}
+@item Specialized Systems Consultants, Inc. (SSC) @tab Free Software Foundation
+@item PO Box 55549 @tab 59 Temple Place --- Suite 330
+@item Seattle, WA 98155 USA @tab Boston, MA 02111-1307 USA
+@item Phone: +1-206-782-7733 @tab Phone: +1-617-542-5942
+@item Fax: +1-206-782-7191 @tab Fax: +1-617-542-2652
+@item E-mail: @code{sales@@ssc.com} @tab E-mail: @code{gnu@@prep.ai.mit.edu}
+@item URL: @code{http://www.ssc.com/} @tab URL: @code{http://www.fsf.org/}
+@end multitable
+
+@sp 1
+@c this ISBN can change! Check with SSC
+@c This one is correct for gawk 3.0 and edition 1.0 from the FSF
+@c ISBN 1-882114-26-4 @*
+@c This one is correct for gawk 3.0.3 and edition 1.0.3 from SSC
+ISBN 1-57831-000-8 @*
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@sp 2
+@c Cover art by Etienne Suvasa.
+Cover art by Amy Wells Wood.
+@end titlepage
+
+@c Thanks to Bob Chassell for directions on doing dedications.
+@iftex
+@headings off
+@page
+@w{ }
+@sp 9
+@center @i{To Miriam, for making me complete.}
+@sp 1
+@center @i{To Chana, for the joy you bring us.}
+@sp 1
+@center @i{To Rivka, for the exponential increase.}
+@sp 1
+@center @i{To Nachum, for the added dimension.}
+@page
+@w{ }
+@page
+@headings on
+@end iftex
+
+@iftex
+@headings off
+@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @|
+@oddheading @| @| @strong{@thischapter}@ @ @ @thispage
+@ifset DRAFT
+@evenfooting @today{} @| @emph{DRAFT!} @| Please Do Not Redistribute
+@oddfooting Please Do Not Redistribute @| @emph{DRAFT!} @| @today{}
+@end ifset
+@end iftex
+
+@ifinfo
+@node Top, Preface, (dir), (dir)
+@top General Introduction
+@c Preface or Licensing nodes should come right after the Top
+@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
+
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}}, @*
+for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation @*
+of AWK.
+
+@end ifinfo
+
+@menu
+* Preface:: What this @value{DOCUMENT} is about; brief
+ history and acknowledgements.
+* What Is Awk:: What is the @code{awk} language; using this
+ @value{DOCUMENT}.
+* Getting Started:: A basic introduction to using @code{awk}. How
+ to run an @code{awk} program. Command line
+ syntax.
+* One-liners:: Short, sample @code{awk} programs.
+* Regexp:: All about matching things using regular
+ expressions.
+* Reading Files:: How to read files and manipulate fields.
+* Printing:: How to print using @code{awk}. Describes the
+ @code{print} and @code{printf} statements.
+ Also describes redirection of output.
+* Expressions:: Expressions are the basic building blocks of
+ statements.
+* Patterns and Actions:: Overviews of patterns and actions.
+* Statements:: The various control statements are described
+ in detail.
+* Built-in Variables:: Built-in Variables
+* Arrays:: The description and use of arrays. Also
+ includes array-oriented control statements.
+* Built-in:: The built-in functions are summarized here.
+* User-defined:: User-defined functions are described in
+ detail.
+* Invoking Gawk:: How to run @code{gawk}.
+* Library Functions:: A Library of @code{awk} Functions.
+* Sample Programs:: Many @code{awk} programs with complete
+ explanations.
+* Language History:: The evolution of the @code{awk} language.
+* Gawk Summary:: @code{gawk} Options and Language Summary.
+* Installation:: Installing @code{gawk} under various operating
+ systems.
+* Notes:: Something about the implementation of
+ @code{gawk}.
+* Glossary:: An explanation of some unfamiliar terms.
+* Copying:: Your right to copy and distribute @code{gawk}.
+* Index:: Concept and Variable Index.
+
+* History:: The history of @code{gawk} and @code{awk}.
+* Manual History:: Brief history of the GNU project and this
+ @value{DOCUMENT}.
+* Acknowledgements:: Acknowledgements.
+* This Manual:: Using this @value{DOCUMENT}. Includes sample
+ input files that you can use.
+* Conventions:: Typographical Conventions.
+* Sample Data Files:: Sample data files for use in the @code{awk}
+ programs illustrated in this @value{DOCUMENT}.
+* Names:: What name to use to find @code{awk}.
+* Running gawk:: How to run @code{gawk} programs; includes
+ command line syntax.
+* One-shot:: Running a short throw-away @code{awk} program.
+* Read Terminal:: Using no input files (input from terminal
+ instead).
+* Long:: Putting permanent @code{awk} programs in
+ files.
+* Executable Scripts:: Making self-contained @code{awk} programs.
+* Comments:: Adding documentation to @code{gawk} programs.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example with two rules.
+* More Complex:: A more complex example.
+* Statements/Lines:: Subdividing or combining statements into
+ lines.
+* Other Features:: Other Features of @code{awk}.
+* When:: When to use @code{gawk} and when to use other
+ things.
+* Regexp Usage:: How to Use Regular Expressions.
+* Escape Sequences:: How to write non-printing characters.
+* Regexp Operators:: Regular Expression Operators.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Leftmost Longest:: How much text matches.
+* Computed Regexps:: Using Dynamic Regexps.
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Non-Constant Fields:: Non-constant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Basic Field Splitting:: How fields are split with single characters or
+ simple strings.
+* Regexp Field Splitting:: Using regexps as the field separator.
+* Single Character Fields:: Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Field Splitting Summary:: Some final points and a summary table.
+* Constant Size:: Reading constant width data.
+* Multiple Line:: Reading multi-line records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+* Getline Intro:: Introduction to the @code{getline} function.
+* Plain Getline:: Using @code{getline} with no arguments.
+* Getline/Variable:: Using @code{getline} into a variable.
+* Getline/File:: Using @code{getline} from a file.
+* Getline/Variable/File:: Using @code{getline} into a variable from a
+ file.
+* Getline/Pipe:: Using @code{getline} from a pipe.
+* Getline/Variable/Pipe:: Using @code{getline} into a variable from a
+ pipe.
+* Getline Summary:: Summary Of @code{getline} Variants.
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+* Redirection:: How to redirect output to multiple files and
+ pipes.
+* Special Files:: File name interpretation in @code{gawk}.
+ @code{gawk} allows access to inherited file
+ descriptors.
+* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Constants:: String, numeric, and regexp constants.
+* Scalar Constants:: Numeric and string constants.
+* Regexp Constants:: Regular Expression constants.
+* Using Constant Regexps:: When and how to use a regexp constant.
+* Variables:: Variables give names to values for later use.
+* Using Variables:: Using variables in your programs.
+* Assignment Options:: Setting variables on the command line and a
+ summary of command line syntax. This is an
+ advanced method of input.
+* Conversion:: The conversion of strings to numbers and vice
+ versa.
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-},
+ etc.)
+* Concatenation:: Concatenating strings.
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+* Truth Values:: What is ``true'' and what is ``false''.
+* Typing and Comparison:: How variables acquire types, and how this
+ affects comparison of numbers and strings with
+ @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions using boolean
+ operators @samp{||} (``or''), @samp{&&}
+ (``and'') and @samp{!} (``not'').
+* Conditional Exp:: Conditional expressions select between two
+ subexpressions under control of a third
+ subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+* Pattern Overview:: What goes into a pattern.
+* Kinds of Patterns:: A list of all kinds of patterns.
+* Regexp Patterns:: Using regexps as patterns.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* Using BEGIN/END:: How and why to use BEGIN/END rules.
+* I/O And BEGIN/END:: I/O issues in BEGIN/END rules.
+* Empty:: The empty pattern, which matches every record.
+* Action Overview:: What goes into an action.
+* If Statement:: Conditionally execute some @code{awk}
+ statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost enclosing
+ loop.
+* Next Statement:: Stop processing the current input record.
+* Nextfile Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @code{awk}.
+* User-modified:: Built-in variables that you change to control
+ @code{awk}.
+* Auto-set:: Built-in variables where @code{awk} gives you
+ information.
+* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}.
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement. It
+ loops through the indices of an array's
+ existing elements.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
+* Numeric Array Subscripts:: How to use numbers as subscripts in
+ @code{awk}.
+* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Multi-dimensional:: Emulating multi-dimensional arrays in
+ @code{awk}.
+* Multi-scanning:: Scanning multi-dimensional arrays.
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers, including
+ @code{int}, @code{sin} and @code{rand}.
+* String Functions:: Functions for string manipulation, such as
+ @code{split}, @code{match}, and
+ @code{sprintf}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with time stamps.
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and what it
+ does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
+* Options:: Command line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+* Known Bugs:: Known Bugs in @code{gawk}.
+* Portability Notes:: What to do if you don't have @code{gawk}.
+* Nextfile Function:: Two implementations of a @code{nextfile}
+ function.
+* Assert Function:: A function for assertions in @code{awk}
+ programs.
+* Round Function:: A function for rounding if @code{sprintf} does
+ not do it correctly.
+* Ordinal Functions:: Functions for using characters as numbers and
+ vice versa.
+* Join Function:: A function to join an array into a string.
+* Mktime Function:: A function to turn a date into a timestamp.
+* Gettimeofday Function:: A function to get formatted times.
+* Filetrans Function:: A function for handling data file transitions.
+* Getopt Function:: A function for processing command line
+ arguments.
+* Passwd Functions:: Functions for getting user information.
+* Group Functions:: Functions for getting group information.
+* Library Names:: How to best name private global variables in
+ library functions.
+* Clones:: Clones of common utilities.
+* Cut Program:: The @code{cut} utility.
+* Egrep Program:: The @code{egrep} utility.
+* Id Program:: The @code{id} utility.
+* Split Program:: The @code{split} utility.
+* Tee Program:: The @code{tee} utility.
+* Uniq Program:: The @code{uniq} utility.
+* Wc Program:: The @code{wc} utility.
+* Miscellaneous Programs:: Some interesting @code{awk} programs.
+* Dupword Program:: Finding duplicated words in a document.
+* Alarm Program:: An alarm clock.
+* Translate Program:: A program similar to the @code{tr} utility.
+* Labels Program:: Printing mailing labels.
+* Word Sorting:: A program to produce a word usage count.
+* History Sorting:: Eliminating duplicate entries from a history
+ file.
+* Extract Program:: Pulling out programs from Texinfo source
+ files.
+* Simple Sed:: A Simple Stream Editor.
+* Igawk Program:: A wrapper for @code{awk} that includes files.
+* V7/SVR3.1:: The major changes between V7 and System V
+ Release 3.1.
+* SVR4:: Minor changes between System V Releases 3.1
+ and 4.
+* POSIX:: New features from the POSIX standard.
+* BTL:: New features from the Bell Laboratories
+ version of @code{awk}.
+* POSIX/GNU:: The extensions in @code{gawk} not in POSIX
+ @code{awk}.
+* Command Line Summary:: Recapitulation of the command line.
+* Language Summary:: A terse review of the language.
+* Variables/Fields:: Variables, fields, and arrays.
+* Fields Summary:: Input field splitting.
+* Built-in Summary:: @code{awk}'s built-in variables.
+* Arrays Summary:: Using arrays.
+* Data Type Summary:: Values in @code{awk} are numbers or strings.
+* Rules Summary:: Patterns and Actions, and their component
+ parts.
+* Pattern Summary:: Quick overview of patterns.
+* Regexp Summary:: Quick overview of regular expressions.
+* Actions Summary:: Quick overview of actions.
+* Operator Summary:: @code{awk} operators.
+* Control Flow Summary:: The control statements.
+* I/O Summary:: The I/O statements.
+* Printf Summary:: A summary of @code{printf}.
+* Special File Summary:: Special file names interpreted internally.
+* Built-in Functions Summary:: Built-in numeric and string functions.
+* Time Functions Summary:: Built-in time functions.
+* String Constants Summary:: Escape sequences in strings.
+* Functions Summary:: Defining and calling functions.
+* Historical Features:: Some undocumented but supported ``features''.
+* Gawk Distribution:: What is in the @code{gawk} distribution.
+* Getting:: How to get the distribution.
+* Extracting:: How to extract the distribution.
+* Distribution contents:: What is in the distribution.
+* Unix Installation:: Installing @code{gawk} under various versions
+ of Unix.
+* Quick Installation:: Compiling @code{gawk} under Unix.
+* Configuration Philosophy:: How it's all supposed to work.
+* VMS Installation:: Installing @code{gawk} on VMS.
+* VMS Compilation:: How to compile @code{gawk} under VMS.
+* VMS Installation Details:: How to install @code{gawk} under VMS.
+* VMS Running:: How to run @code{gawk} under VMS.
+* VMS POSIX:: Alternate instructions for VMS POSIX.
+* PC Installation:: Installing and Compiling @code{gawk} on MS-DOS
+ and OS/2
+* Atari Installation:: Installing @code{gawk} on the Atari ST.
+* Atari Compiling:: Compiling @code{gawk} on Atari
+* Atari Using:: Running @code{gawk} on Atari
+* Amiga Installation:: Installing @code{gawk} on an Amiga.
+* Bugs:: Reporting Problems and Bugs.
+* Other Versions:: Other freely available @code{awk}
+ implementations.
+* Compatibility Mode:: How to disable certain @code{gawk} extensions.
+* Additions:: Making Additions To @code{gawk}.
+* Adding Code:: Adding code to the main body of @code{gawk}.
+* New Ports:: Porting @code{gawk} to a new operating system.
+* Future Extensions:: New features that may be implemented one day.
+* Improvements:: Suggestions for improvements by volunteers.
+
+@end menu
+
+@c dedication for Info file
+@ifinfo
+@center To Miriam, for making me complete.
+@sp 1
+@center To Chana, for the joy you bring us.
+@sp 1
+@center To Rivka, for the exponential increase.
+@sp 1
+@center To Nachum, for the added dimension.
+@end ifinfo
+
+@node Preface, What Is Awk, Top, Top
+@unnumbered Preface
+
+@c I saw a comment somewhere that the preface should describe the book itself,
+@c and the introduction should describe what the book covers.
+
+This @value{DOCUMENT} teaches you about the @code{awk} language and
+how you can use it effectively. You should already be familiar with basic
+system commands, such as @code{cat} and @code{ls},@footnote{These commands
+are available on POSIX compliant systems, as well as on traditional Unix
+based systems. If you are using some other operating system, you still need to
+be familiar with the ideas of I/O redirection and pipes.} and basic shell
+facilities, such as Input/Output (I/O) redirection and pipes.
+
+Implementations of the @code{awk} language are available for many different
+computing environments. This @value{DOCUMENT}, while describing the @code{awk} language
+in general, also describes a particular implementation of @code{awk} called
+@code{gawk} (which stands for ``GNU Awk''). @code{gawk} runs on a broad range
+of Unix systems, ranging from 80386 PC-based computers, up through large scale
+systems, such as Crays. @code{gawk} has also been ported to MS-DOS and
+OS/2 PC's, Atari and Amiga micro-computers, and VMS.
+
+@menu
+* History:: The history of @code{gawk} and @code{awk}.
+* Manual History:: Brief history of the GNU project and this
+ @value{DOCUMENT}.
+* Acknowledgements:: Acknowledgements.
+@end menu
+
+@node History, Manual History, Preface, Preface
+@unnumberedsec History of @code{awk} and @code{gawk}
+
+@cindex acronym
+@cindex history of @code{awk}
+@cindex Aho, Alfred
+@cindex Weinberger, Peter
+@cindex Kernighan, Brian
+@cindex old @code{awk}
+@cindex new @code{awk}
+The name @code{awk} comes from the initials of its designers: Alfred V.@:
+Aho, Peter J.@: Weinberger, and Brian W.@: Kernighan. The original version of
+@code{awk} was written in 1977 at AT&T Bell Laboratories.
+In 1985 a new version made the programming
+language more powerful, introducing user-defined functions, multiple input
+streams, and computed regular expressions.
+This new version became generally available with Unix System V Release 3.1.
+The version in System V Release 4 added some new features and also cleaned
+up the behavior in some of the ``dark corners'' of the language.
+The specification for @code{awk} in the POSIX Command Language
+and Utilities standard further clarified the language based on feedback
+from both the @code{gawk} designers, and the original Bell Labs @code{awk}
+designers.
+
+The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
+and Jay Fenlason, with advice from Richard Stallman. John Woods
+contributed parts of the code as well. In 1988 and 1989, David Trueman, with
+help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
+with the newer @code{awk}. Current development focuses on bug fixes,
+performance improvements, standards compliance, and occasionally, new features.
+
+@node Manual History, Acknowledgements, History, Preface
+@unnumberedsec The GNU Project and This Book
+
+@cindex Free Software Foundation
+@cindex Stallman, Richard
+The Free Software Foundation (FSF) is a non-profit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor. GNU Emacs is the most widely used version of Emacs today.
+
+@cindex GNU Project
+The GNU project is an on-going effort on the part of the Free Software
+Foundation to create a complete, freely distributable, POSIX compliant
+computing environment. (GNU stands for ``GNU's not Unix''.)
+The FSF uses the ``GNU General Public License'' (or GPL) to ensure that
+source code for their software is always available to the end user. A
+copy of the GPL is included for your reference
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+The GPL applies to the C language source code for @code{gawk}.
+
+A shell, an editor (Emacs), highly portable optimizing C, C++, and
+Objective-C compilers, a symbolic debugger, and dozens of large and
+small utilities (such as @code{gawk}), have all been completed and are
+freely available. As of this writing (early 1997), the GNU operating
+system kernel (the HURD), has been released, but is still in an early
+stage of development.
+
+@cindex Linux
+@cindex NetBSD
+@cindex FreeBSD
+Until the GNU operating system is more fully developed, you should
+consider using Linux, a freely distributable, Unix-like operating
+system for 80386, DEC Alpha, Sun SPARC and other systems. There are
+many books on Linux. One freely available one is @cite{Linux
+Installation and Getting Started}, by Matt Welsh.
+Many Linux distributions are available, often in computer stores or
+bundled on CD-ROM with books about Linux.
+(There are three other freely available, Unix-like operating systems for
+80386 and other systems, NetBSD, FreeBSD,and OpenBSD. All are based on the
+4.4-Lite Berkeley Software Distribution, and they use recent versions
+of @code{gawk} for their versions of @code{awk}.)
+
+@iftex
+This @value{DOCUMENT} you are reading now is actually free. The
+information in it is freely available to anyone, the machine readable
+source code for the @value{DOCUMENT} comes with @code{gawk}, and anyone
+may take this @value{DOCUMENT} to a copying machine and make as many
+copies of it as they like. (Take a moment to check the copying
+permissions on the Copyright page.)
+
+If you paid money for this @value{DOCUMENT}, what you actually paid for
+was the @value{DOCUMENT}'s nice printing and binding, and the
+publisher's associated costs to produce it. We have made an effort to
+keep these costs reasonable; most people would prefer a bound book to
+over 330 pages of photo-copied text that would then have to be held in
+a loose-leaf binder (not to mention the time and labor involved in
+doing the copying). The same is true of producing this
+@value{DOCUMENT} from the machine readable source; the retail price is
+only slightly more than the cost per page of printing it
+on a laser printer.
+@end iftex
+
+This @value{DOCUMENT} itself has gone through several previous,
+preliminary editions. I started working on a preliminary draft of
+@cite{The GAWK Manual}, by Diane Close, Paul Rubin, and Richard
+Stallman in the fall of 1988.
+It was around 90 pages long, and barely described the original, ``old''
+version of @code{awk}. After substantial revision, the first version of
+the @cite{The GAWK Manual} to be released was Edition 0.11 Beta in
+October of 1989. The manual then underwent more substantial revision
+for Edition 0.13 of December 1991.
+David Trueman, Pat Rankin, and Michal Jaegermann contributed sections
+of the manual for Edition 0.13.
+That edition was published by the
+FSF as a bound book early in 1992. Since then there have been several
+minor revisions, notably Edition 0.14 of November 1992 that was published
+by the FSF in January of 1993, and Edition 0.16 of August 1993.
+
+Edition 1.0 of @cite{@value{TITLE}} represents a significant re-working
+of @cite{The GAWK Manual}, with much additional material.
+The FSF and I agree that I am now the primary author.
+I also felt that it needed a more descriptive title.
+
+@cite{@value{TITLE}} will undoubtedly continue to evolve.
+An electronic version
+comes with the @code{gawk} distribution from the FSF.
+If you find an error in this @value{DOCUMENT}, please report it!
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on submitting
+problem reports electronically, or write to me in care of the FSF.
+
+@node Acknowledgements, , Manual History, Preface
+@unnumberedsec Acknowledgements
+
+@cindex Stallman, Richard
+I would like to acknowledge Richard M.@: Stallman, for his vision of a
+better world, and for his courage in founding the FSF and starting the
+GNU project.
+
+The initial draft of @cite{The GAWK Manual} had the following acknowledgements:
+
+@quotation
+Many people need to be thanked for their assistance in producing this
+manual. Jay Fenlason contributed many ideas and sample programs. Richard
+Mlynarik and Robert Chassell gave helpful comments on drafts of this
+manual. The paper @cite{A Supplemental Document for @code{awk}} by John W.@:
+Pierce of the Chemistry Department at UC San Diego, pinpointed several
+issues relevant both to @code{awk} implementation and to this manual, that
+would otherwise have escaped us.
+@end quotation
+
+The following people provided many helpful comments on Edition 0.13 of
+@cite{The GAWK Manual}: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
+Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
+and Michal Jaegermann.
+
+The following people provided many helpful comments for Edition 1.0 of
+@cite{@value{TITLE}}: Karl Berry, Michael Brennan, Darrel
+Hankerson, Michal Jaegermann, Michael Lijewski, and Miriam Robbins.
+Pat Rankin, Michal Jaegermann, Darrel Hankerson and Scott Deifik
+updated their respective sections for Edition 1.0.
+
+Robert J.@: Chassell provided much valuable advice on
+the use of Texinfo. He also deserves special thanks for
+convincing me @emph{not} to title this @value{DOCUMENT}
+@cite{How To Gawk Politely}.
+Karl Berry helped significantly with the @TeX{} part of Texinfo.
+
+@cindex Trueman, David
+David Trueman deserves special credit; he has done a yeoman job
+of evolving @code{gawk} so that it performs well, and without bugs.
+Although he is no longer involved with @code{gawk},
+working with him on this project was a significant pleasure.
+
+@cindex Deifik, Scott
+@cindex Hankerson, Darrel
+@cindex Rommel, Kai Uwe
+@cindex Rankin, Pat
+@cindex Jaegermann, Michal
+Scott Deifik, Darrel Hankerson, Kai Uwe Rommel, Pat Rankin, and Michal
+Jaegermann (in no particular order) are long time members of the
+@code{gawk} ``crack portability team.'' Without their hard work and
+help, @code{gawk} would not be nearly the fine program it is today. It
+has been and continues to be a pleasure working with this team of fine
+people.
+
+@cindex Friedl, Jeffrey
+Jeffrey Friedl provided invaluable help in tracking down a number
+of last minute problems with regular expressions in @code{gawk} 3.0.
+
+@cindex Kernighan, Brian
+David and I would like to thank Brian Kernighan of Bell Labs for
+invaluable assistance during the testing and debugging of @code{gawk}, and for
+help in clarifying numerous points about the language. We could not have
+done nearly as good a job on either @code{gawk} or its documentation without
+his help.
+
+@cindex Hughes, Phil
+I would like to thank Marshall and Elaine Hartholz of Seattle, and Dr.@:
+Bert and Rita Schreiber of Detroit for large amounts of quiet vacation
+time in their homes, which allowed me to make significant progress on
+this @value{DOCUMENT} and on @code{gawk} itself. Phil Hughes of SSC
+contributed in a very important way by loaning me his laptop Linux
+system, not once, but twice, allowing me to do a lot of work while
+away from home.
+
+@cindex Robbins, Miriam
+Finally, I must thank my wonderful wife, Miriam, for her patience through
+the many versions of this project, for her proof-reading,
+and for sharing me with the computer.
+I would like to thank my parents for their love, and for the grace with
+which they raised and educated me.
+I also must acknowledge my gratitude to G-d, for the many opportunities
+He has sent my way, as well as for the gifts He has given me with which to
+take advantage of those opportunities.
+@sp 2
+@noindent
+Arnold Robbins @*
+Atlanta, Georgia @*
+February, 1997
+
+@ignore
+Stuff still not covered anywhere:
+BASICS:
+ Integer vs. floating point
+ Hex vs. octal vs. decimal
+ Interpreter vs compiler
+ input/output
+@end ignore
+
+@node What Is Awk, Getting Started, Preface, Top
+@chapter Introduction
+
+If you are like many computer users, you would frequently like to make
+changes in various text files wherever certain patterns appear, or
+extract data from parts of certain lines while discarding the rest. To
+write a program to do this in a language such as C or Pascal is a
+time-consuming inconvenience that may take many lines of code. The job
+may be easier with @code{awk}.
+
+The @code{awk} utility interprets a special-purpose programming language
+that makes it possible to handle simple data-reformatting jobs
+with just a few lines of code.
+
+The GNU implementation of @code{awk} is called @code{gawk}; it is fully
+upward compatible with the System V Release 4 version of
+@code{awk}. @code{gawk} is also upward compatible with the POSIX
+specification of the @code{awk} language. This means that all
+properly written @code{awk} programs should work with @code{gawk}.
+Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
+implementations.
+
+@cindex uses of @code{awk}
+Using @code{awk} you can:
+
+@itemize @bullet
+@item
+manage small, personal databases
+
+@item
+generate reports
+
+@item
+validate data
+
+@item
+produce indexes, and perform other document preparation tasks
+
+@item
+even experiment with algorithms that can be adapted later to other computer
+languages
+@end itemize
+
+@menu
+* This Manual:: Using this @value{DOCUMENT}. Includes sample
+ input files that you can use.
+* Conventions:: Typographical Conventions.
+* Sample Data Files:: Sample data files for use in the @code{awk}
+ programs illustrated in this @value{DOCUMENT}.
+@end menu
+
+@node This Manual, Conventions, What Is Awk, What Is Awk
+@section Using This Book
+@cindex book, using this
+@cindex using this book
+@cindex language, @code{awk}
+@cindex program, @code{awk}
+@ignore
+@cindex @code{awk} language
+@cindex @code{awk} program
+@end ignore
+
+The term @code{awk} refers to a particular program, and to the language you
+use to tell this program what to do. When we need to be careful, we call
+the program ``the @code{awk} utility'' and the language ``the @code{awk}
+language.'' The term @code{gawk} refers to a version of @code{awk} developed
+as part the GNU project. The purpose of this @value{DOCUMENT} is to explain
+both the @code{awk} language and how to run the @code{awk} utility.
+
+The main purpose of the @value{DOCUMENT} is to explain the features
+of @code{awk}, as defined in the POSIX standard. It does so in the context
+of one particular implementation, @code{gawk}. While doing so, it will also
+attempt to describe important differences between @code{gawk} and other
+@code{awk} implementations. Finally, any @code{gawk} features that
+are not in the POSIX standard for @code{awk} will be noted.
+
+@iftex
+This @value{DOCUMENT} has the difficult task of being both tutorial and reference.
+If you are a novice, feel free to skip over details that seem too complex.
+You should also ignore the many cross references; they are for the
+expert user, and for the on-line Info version of the document.
+@end iftex
+
+The term @dfn{@code{awk} program} refers to a program written by you in
+the @code{awk} programming language.
+
+@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
+essentials you need to know to start using @code{awk}.
+
+Some useful ``one-liners'' are included to give you a feel for the
+@code{awk} language (@pxref{One-liners, ,Useful One Line Programs}).
+
+Many sample @code{awk} programs have been provided for you
+(@pxref{Library Functions, ,A Library of @code{awk} Functions}; also
+@pxref{Sample Programs, ,Practical @code{awk} Programs}).
+
+The entire @code{awk} language is summarized for quick reference in
+@ref{Gawk Summary, ,@code{gawk} Summary}. Look there if you just need
+to refresh your memory about a particular feature.
+
+If you find terms that you aren't familiar with, try looking them
+up in the glossary (@pxref{Glossary}).
+
+Most of the time complete @code{awk} programs are used as examples, but in
+some of the more advanced sections, only the part of the @code{awk} program
+that illustrates the concept being described is shown.
+
+While this @value{DOCUMENT} is aimed principally at people who have not been
+exposed
+to @code{awk}, there is a lot of information here that even the @code{awk}
+expert should find useful. In particular, the description of POSIX
+@code{awk}, and the example programs in
+@ref{Library Functions, ,A Library of @code{awk} Functions}, and
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+should be of interest.
+
+@c fakenode --- for prepinfo
+@unnumberedsubsec Dark Corners
+@display
+@i{Who opened that window shade?!?}
+Count Dracula
+@end display
+@sp 1
+
+@cindex d.c., see ``dark corner''
+@cindex dark corner
+Until the POSIX standard (and @cite{The Gawk Manual}),
+many features of @code{awk} were either poorly documented, or not
+documented at all. Descriptions of such features
+(often called ``dark corners'') are noted in this @value{DOCUMENT} with
+``(d.c.)''.
+They also appear in the index under the heading ``dark corner.''
+
+@node Conventions, Sample Data Files, This Manual, What Is Awk
+@section Typographical Conventions
+
+This @value{DOCUMENT} is written using Texinfo, the GNU documentation formatting language.
+A single Texinfo source file is used to produce both the printed and on-line
+versions of the documentation.
+@iftex
+Because of this, the typographical conventions
+are slightly different than in other books you may have read.
+@end iftex
+@ifinfo
+This section briefly documents the typographical conventions used in Texinfo.
+@end ifinfo
+
+Examples you would type at the command line are preceded by the common
+shell primary and secondary prompts, @samp{$} and @samp{>}.
+Output from the command is preceded by the glyph ``@print{}''.
+This typically represents the command's standard output.
+Error messages, and other output on the command's standard error, are preceded
+by the glyph ``@error{}''. For example:
+
+@example
+@group
+$ echo hi on stdout
+@print{} hi on stdout
+$ echo hello on stderr 1>&2
+@error{} hello on stderr
+@end group
+@end example
+
+@iftex
+In the text, command names appear in @code{this font}, while code segments
+appear in the same font and quoted, @samp{like this}. Some things will
+be emphasized @emph{like this}, and if a point needs to be made
+strongly, it will be done @strong{like this}. The first occurrence of
+a new term is usually its @dfn{definition}, and appears in the same
+font as the previous occurrence of ``definition'' in this sentence.
+File names are indicated like this: @file{/path/to/ourfile}.
+@end iftex
+
+Characters that you type at the keyboard look @kbd{like this}. In particular,
+there are special characters called ``control characters.'' These are
+characters that you type by holding down both the @kbd{CONTROL} key and
+another key, at the same time. For example, a @kbd{Control-d} is typed
+by first pressing and holding the @kbd{CONTROL} key, next
+pressing the @kbd{d} key, and finally releasing both keys.
+
+@node Sample Data Files, , Conventions, What Is Awk
+@section Data Files for the Examples
+
+@cindex input file, sample
+@cindex sample input file
+@cindex @file{BBS-list} file
+Many of the examples in this @value{DOCUMENT} take their input from two sample
+data files. The first, called @file{BBS-list}, represents a list of
+computer bulletin board systems together with information about those systems.
+The second data file, called @file{inventory-shipped}, contains
+information about shipments on a monthly basis. In both files,
+each line is considered to be one @dfn{record}.
+
+In the file @file{BBS-list}, each record contains the name of a computer
+bulletin board, its phone number, the board's baud rate(s), and a code for
+the number of hours it is operational. An @samp{A} in the last column
+means the board operates 24 hours a day. A @samp{B} in the last
+column means the board operates evening and weekend hours, only. A
+@samp{C} means the board operates only on weekends.
+
+@c 2e: Update the baud rates to reflect today's faster modems
+@example
+@c system mkdir eg
+@c system mkdir eg/lib
+@c system mkdir eg/data
+@c system mkdir eg/prog
+@c system mkdir eg/misc
+@c file eg/data/BBS-list
+aardvark 555-5553 1200/300 B
+alpo-net 555-3412 2400/1200/300 A
+barfly 555-7685 1200/300 A
+bites 555-1675 2400/1200/300 A
+camelot 555-0542 300 C
+core 555-2912 1200/300 C
+fooey 555-1234 2400/1200/300 B
+foot 555-6699 1200/300 B
+macfoo 555-6480 1200/300 A
+sdace 555-3430 2400/1200/300 A
+sabafoo 555-2127 1200/300 C
+@c endfile
+@end example
+
+@cindex @file{inventory-shipped} file
+The second data file, called @file{inventory-shipped}, represents
+information about shipments during the year.
+Each record contains the month of the year, the number
+of green crates shipped, the number of red boxes shipped, the number of
+orange bags shipped, and the number of blue packages shipped,
+respectively. There are 16 entries, covering the 12 months of one year
+and four months of the next year.
+
+@example
+@c file eg/data/inventory-shipped
+Jan 13 25 15 115
+Feb 15 32 24 226
+Mar 15 24 34 228
+Apr 31 52 63 420
+May 16 34 29 208
+Jun 31 42 75 492
+Jul 24 34 67 436
+Aug 15 34 47 316
+Sep 13 55 37 277
+Oct 29 54 68 525
+Nov 20 87 82 577
+Dec 17 35 61 401
+
+Jan 21 36 64 620
+Feb 26 58 80 652
+Mar 24 75 70 495
+Apr 21 70 74 514
+@c endfile
+@end example
+
+@ifinfo
+If you are reading this in GNU Emacs using Info, you can copy the regions
+of text showing these sample files into your own test files. This way you
+can try out the examples shown in the remainder of this document. You do
+this by using the command @kbd{M-x write-region} to copy text from the Info
+file into a file for use with @code{awk}
+(@xref{Misc File Ops, , Miscellaneous File Operations, emacs, GNU Emacs Manual},
+for more information). Using this information, create your own
+@file{BBS-list} and @file{inventory-shipped} files, and practice what you
+learn in this @value{DOCUMENT}.
+
+If you are using the stand-alone version of Info,
+see @ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+for an @code{awk} program that will extract these data files from
+@file{gawk.texi}, the Texinfo source file for this Info file.
+@end ifinfo
+
+@node Getting Started, One-liners, What Is Awk, Top
+@chapter Getting Started with @code{awk}
+@cindex script, definition of
+@cindex rule, definition of
+@cindex program, definition of
+@cindex basic function of @code{awk}
+
+The basic function of @code{awk} is to search files for lines (or other
+units of text) that contain certain patterns. When a line matches one
+of the patterns, @code{awk} performs specified actions on that line.
+@code{awk} keeps processing input lines in this way until the end of the
+input files are reached.
+
+@cindex data-driven languages
+@cindex procedural languages
+@cindex language, data-driven
+@cindex language, procedural
+Programs in @code{awk} are different from programs in most other languages,
+because @code{awk} programs are @dfn{data-driven}; that is, you describe
+the data you wish to work with, and then what to do when you find it.
+Most other languages are @dfn{procedural}; you have to describe, in great
+detail, every step the program is to take. When working with procedural
+languages, it is usually much
+harder to clearly describe the data your program will process.
+For this reason, @code{awk} programs are often refreshingly easy to both
+write and read.
+
+@cindex program, definition of
+@cindex rule, definition of
+When you run @code{awk}, you specify an @code{awk} @dfn{program} that
+tells @code{awk} what to do. The program consists of a series of
+@dfn{rules}. (It may also contain @dfn{function definitions},
+an advanced feature which we will ignore for now.
+@xref{User-defined, ,User-defined Functions}.) Each rule specifies one
+pattern to search for, and one action to perform when that pattern is found.
+
+Syntactically, a rule consists of a pattern followed by an action. The
+action is enclosed in curly braces to separate it from the pattern.
+Rules are usually separated by newlines. Therefore, an @code{awk}
+program looks like this:
+
+@example
+@var{pattern} @{ @var{action} @}
+@var{pattern} @{ @var{action} @}
+@dots{}
+@end example
+
+@menu
+* Names:: What name to use to find @code{awk}.
+* Running gawk:: How to run @code{gawk} programs; includes
+ command line syntax.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example with two rules.
+* More Complex:: A more complex example.
+* Statements/Lines:: Subdividing or combining statements into
+ lines.
+* Other Features:: Other Features of @code{awk}.
+* When:: When to use @code{gawk} and when to use other
+ things.
+@end menu
+
+@node Names, Running gawk , Getting Started, Getting Started
+@section A Rose By Any Other Name
+
+@cindex old @code{awk} vs. new @code{awk}
+@cindex new @code{awk} vs. old @code{awk}
+The @code{awk} language has evolved over the years. Full details are
+provided in @ref{Language History, ,The Evolution of the @code{awk} Language}.
+The language described in this @value{DOCUMENT}
+is often referred to as ``new @code{awk}.''
+
+Because of this, many systems have multiple
+versions of @code{awk}.
+Some systems have an @code{awk} utility that implements the
+original version of the @code{awk} language, and a @code{nawk} utility
+for the new version. Others have an @code{oawk} for the ``old @code{awk}''
+language, and plain @code{awk} for the new one. Still others only
+have one version, usually the new one.@footnote{Often, these systems
+use @code{gawk} for their @code{awk} implementation!}
+
+All in all, this makes it difficult for you to know which version of
+@code{awk} you should run when writing your programs. The best advice
+we can give here is to check your local documentation. Look for @code{awk},
+@code{oawk}, and @code{nawk}, as well as for @code{gawk}. Chances are, you
+will have some version of new @code{awk} on your system, and that is what
+you should use when running your programs. (Of course, if you're reading
+this @value{DOCUMENT}, chances are good that you have @code{gawk}!)
+
+Throughout this @value{DOCUMENT}, whenever we refer to a language feature
+that should be available in any complete implementation of POSIX @code{awk},
+we simply use the term @code{awk}. When referring to a feature that is
+specific to the GNU implementation, we use the term @code{gawk}.
+
+@node Running gawk, Very Simple, Names, Getting Started
+@section How to Run @code{awk} Programs
+
+@cindex command line formats
+@cindex running @code{awk} programs
+There are several ways to run an @code{awk} program. If the program is
+short, it is easiest to include it in the command that runs @code{awk},
+like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of patterns and actions, as
+described earlier.
+(The reason for the single quotes is described below, in
+@ref{One-shot, ,One-shot Throw-away @code{awk} Programs}.)
+
+When the program is long, it is usually more convenient to put it in a file
+and run it with a command like this:
+
+@example
+awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@menu
+* One-shot:: Running a short throw-away @code{awk} program.
+* Read Terminal:: Using no input files (input from terminal
+ instead).
+* Long:: Putting permanent @code{awk} programs in
+ files.
+* Executable Scripts:: Making self-contained @code{awk} programs.
+* Comments:: Adding documentation to @code{gawk} programs.
+@end menu
+
+@node One-shot, Read Terminal, Running gawk, Running gawk
+@subsection One-shot Throw-away @code{awk} Programs
+
+Once you are familiar with @code{awk}, you will often type in simple
+programs the moment you want to use them. Then you can write the
+program as the first argument of the @code{awk} command, like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of @var{patterns} and
+@var{actions}, as described earlier.
+
+@cindex single quotes, why needed
+This command format instructs the @dfn{shell}, or command interpreter,
+to start @code{awk} and use the @var{program} to process records in the
+input file(s). There are single quotes around @var{program} so that
+the shell doesn't interpret any @code{awk} characters as special shell
+characters. They also cause the shell to treat all of @var{program} as
+a single argument for @code{awk} and allow @var{program} to be more
+than one line long.
+
+This format is also useful for running short or medium-sized @code{awk}
+programs from shell scripts, because it avoids the need for a separate
+file for the @code{awk} program. A self-contained shell script is more
+reliable since there are no other files to misplace.
+
+@ref{One-liners, , Useful One Line Programs}, presents several short,
+self-contained programs.
+
+As an interesting side point, the command
+
+@example
+awk '/foo/' @var{files} @dots{}
+@end example
+
+@noindent
+is essentially the same as
+
+@cindex @code{egrep}
+@example
+egrep foo @var{files} @dots{}
+@end example
+
+@node Read Terminal, Long, One-shot, Running gawk
+@subsection Running @code{awk} without Input Files
+
+@cindex standard input
+@cindex input, standard
+You can also run @code{awk} without any input files. If you type the
+command line:
+
+@example
+awk '@var{program}'
+@end example
+
+@noindent
+then @code{awk} applies the @var{program} to the @dfn{standard input},
+which usually means whatever you type on the terminal. This continues
+until you indicate end-of-file by typing @kbd{Control-d}.
+(On other operating systems, the end-of-file character may be different.
+For example, on OS/2 and MS-DOS, it is @kbd{Control-z}.)
+
+For example, the following program prints a friendly piece of advice
+(from Douglas Adams' @cite{The Hitchhiker's Guide to the Galaxy}),
+to keep you from worrying about the complexities of computer programming
+(@samp{BEGIN} is a feature we haven't discussed yet).
+
+@example
+$ awk "BEGIN @{ print \"Don't Panic!\" @}"
+@print{} Don't Panic!
+@end example
+
+@cindex quoting, shell
+@cindex shell quoting
+This program does not read any input. The @samp{\} before each of the
+inner double quotes is necessary because of the shell's quoting rules,
+in particular because it mixes both single quotes and double quotes.
+
+This next simple @code{awk} program
+emulates the @code{cat} utility; it copies whatever you type at the
+keyboard to its standard output. (Why this works is explained shortly.)
+
+@example
+$ awk '@{ print @}'
+Now is the time for all good men
+@print{} Now is the time for all good men
+to come to the aid of their country.
+@print{} to come to the aid of their country.
+Four score and seven years ago, ...
+@print{} Four score and seven years ago, ...
+What, me worry?
+@print{} What, me worry?
+@kbd{Control-d}
+@end example
+
+@node Long, Executable Scripts, Read Terminal, Running gawk
+@subsection Running Long Programs
+
+@cindex running long programs
+@cindex @code{-f} option
+@cindex program file
+@cindex file, @code{awk} program
+Sometimes your @code{awk} programs can be very long. In this case it is
+more convenient to put the program into a separate file. To tell
+@code{awk} to use that file for its program, you type:
+
+@example
+awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+The @samp{-f} instructs the @code{awk} utility to get the @code{awk} program
+from the file @var{source-file}. Any file name can be used for
+@var{source-file}. For example, you could put the program:
+
+@example
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+into the file @file{advice}. Then this command:
+
+@example
+awk -f advice
+@end example
+
+@noindent
+does the same thing as this one:
+
+@example
+awk "BEGIN @{ print \"Don't Panic!\" @}"
+@end example
+
+@cindex quoting, shell
+@cindex shell quoting
+@noindent
+which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
+Note that you don't usually need single quotes around the file name that you
+specify with @samp{-f}, because most file names don't contain any of the shell's
+special characters. Notice that in @file{advice}, the @code{awk}
+program did not have single quotes around it. The quotes are only needed
+for programs that are provided on the @code{awk} command line.
+
+If you want to identify your @code{awk} program files clearly as such,
+you can add the extension @file{.awk} to the file name. This doesn't
+affect the execution of the @code{awk} program, but it does make
+``housekeeping'' easier.
+
+@node Executable Scripts, Comments, Long, Running gawk
+@subsection Executable @code{awk} Programs
+@cindex executable scripts
+@cindex scripts, executable
+@cindex self contained programs
+@cindex program, self contained
+@cindex @code{#!} (executable scripts)
+
+Once you have learned @code{awk}, you may want to write self-contained
+@code{awk} scripts, using the @samp{#!} script mechanism. You can do
+this on many Unix systems@footnote{The @samp{#!} mechanism works on
+Linux systems,
+Unix systems derived from Berkeley Unix, System V Release 4, and some System
+V Release 3 systems.} (and someday on the GNU system).
+
+For example, you could update the file @file{advice} to look like this:
+
+@example
+#! /bin/awk -f
+
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+After making this file executable (with the @code{chmod} utility), you
+can simply type @samp{advice}
+at the shell, and the system will arrange to run @code{awk}@footnote{The
+line beginning with @samp{#!} lists the full file name of an interpreter
+to be run, and an optional initial command line argument to pass to that
+interpreter. The operating system then runs the interpreter with the given
+argument and the full argument list of the executed program. The first argument
+in the list is the full file name of the @code{awk} program. The rest of the
+argument list will either be options to @code{awk}, or data files,
+or both.} as if you had typed @samp{awk -f advice}.
+
+@example
+@group
+$ advice
+@print{} Don't Panic!
+@end group
+@end example
+
+@noindent
+Self-contained @code{awk} scripts are useful when you want to write a
+program which users can invoke without their having to know that the program is
+written in @code{awk}.
+
+@cindex shell scripts
+@cindex scripts, shell
+Some older systems do not support the @samp{#!} mechanism. You can get a
+similar effect using a regular shell script. It would look something
+like this:
+
+@example
+: The colon ensures execution by the standard shell.
+awk '@var{program}' "$@@"
+@end example
+
+Using this technique, it is @emph{vital} to enclose the @var{program} in
+single quotes to protect it from interpretation by the shell. If you
+omit the quotes, only a shell wizard can predict the results.
+
+The @code{"$@@"} causes the shell to forward all the command line
+arguments to the @code{awk} program, without interpretation. The first
+line, which starts with a colon, is used so that this shell script will
+work even if invoked by a user who uses the C shell. (Not all older systems
+obey this convention, but many do.)
+@c 2e:
+@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
+
+@node Comments, , Executable Scripts, Running gawk
+@subsection Comments in @code{awk} Programs
+@cindex @code{#} (comment)
+@cindex comments
+@cindex use of comments
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+
+A @dfn{comment} is some text that is included in a program for the sake
+of human readers; it is not really part of the program. Comments
+can explain what the program does, and how it works. Nearly all
+programming languages have provisions for comments, because programs are
+typically hard to understand without their extra help.
+
+In the @code{awk} language, a comment starts with the sharp sign
+character, @samp{#}, and continues to the end of the line.
+The @samp{#} does not have to be the first character on the line. The
+@code{awk} language ignores the rest of a line following a sharp sign.
+For example, we could have put the following into @file{advice}:
+
+@example
+# This program prints a nice friendly message. It helps
+# keep novice users from being afraid of the computer.
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+You can put comment lines into keyboard-composed throw-away @code{awk}
+programs also, but this usually isn't very useful; the purpose of a
+comment is to help you or another person understand the program at
+a later time.
+
+@node Very Simple, Two Rules, Running gawk, Getting Started
+@section A Very Simple Example
+
+The following command runs a simple @code{awk} program that searches the
+input file @file{BBS-list} for the string of characters: @samp{foo}. (A
+string of characters is usually called a @dfn{string}.
+The term @dfn{string} is perhaps based on similar usage in English, such
+as ``a string of pearls,'' or, ``a string of cars in a train.'')
+
+@example
+awk '/foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+When lines containing @samp{foo} are found, they are printed, because
+@w{@samp{print $0}} means print the current line. (Just @samp{print} by
+itself means the same thing, so we could have written that
+instead.)
+
+You will notice that slashes, @samp{/}, surround the string @samp{foo}
+in the @code{awk} program. The slashes indicate that @samp{foo}
+is a pattern to search for. This type of pattern is called a
+@dfn{regular expression}, and is covered in more detail later
+(@pxref{Regexp, ,Regular Expressions}).
+The pattern is allowed to match parts of words.
+There are
+single-quotes around the @code{awk} program so that the shell won't
+interpret any of it as special shell characters.
+
+Here is what this program prints:
+
+@example
+@group
+$ awk '/foo/ @{ print $0 @}' BBS-list
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+@cindex action, default
+@cindex pattern, default
+@cindex default action
+@cindex default pattern
+In an @code{awk} rule, either the pattern or the action can be omitted,
+but not both. If the pattern is omitted, then the action is performed
+for @emph{every} input line. If the action is omitted, the default
+action is to print all lines that match the pattern.
+
+@cindex empty action
+@cindex action, empty
+Thus, we could leave out the action (the @code{print} statement and the curly
+braces) in the above example, and the result would be the same: all
+lines matching the pattern @samp{foo} would be printed. By comparison,
+omitting the @code{print} statement but retaining the curly braces makes an
+empty action that does nothing; then no lines would be printed.
+
+@node Two Rules, More Complex, Very Simple, Getting Started
+@section An Example with Two Rules
+@cindex how @code{awk} works
+
+The @code{awk} utility reads the input files one line at a
+time. For each line, @code{awk} tries the patterns of each of the rules.
+If several patterns match then several actions are run, in the order in
+which they appear in the @code{awk} program. If no patterns match, then
+no actions are run.
+
+After processing all the rules (perhaps none) that match the line,
+@code{awk} reads the next line (however,
+@pxref{Next Statement, ,The @code{next} Statement},
+and also @pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+This continues until the end of the file is reached.
+
+For example, the @code{awk} program:
+
+@example
+/12/ @{ print $0 @}
+/21/ @{ print $0 @}
+@end example
+
+@noindent
+contains two rules. The first rule has the string @samp{12} as the
+pattern and @samp{print $0} as the action. The second rule has the
+string @samp{21} as the pattern and also has @samp{print $0} as the
+action. Each rule's action is enclosed in its own pair of braces.
+
+This @code{awk} program prints every line that contains the string
+@samp{12} @emph{or} the string @samp{21}. If a line contains both
+strings, it is printed twice, once by each rule.
+
+This is what happens if we run this program on our two sample data files,
+@file{BBS-list} and @file{inventory-shipped}, as shown here:
+
+@example
+$ awk '/12/ @{ print $0 @}
+> /21/ @{ print $0 @}' BBS-list inventory-shipped
+@print{} aardvark 555-5553 1200/300 B
+@print{} alpo-net 555-3412 2400/1200/300 A
+@print{} barfly 555-7685 1200/300 A
+@print{} bites 555-1675 2400/1200/300 A
+@print{} core 555-2912 1200/300 C
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sdace 555-3430 2400/1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@print{} sabafoo 555-2127 1200/300 C
+@print{} Jan 21 36 64 620
+@print{} Apr 21 70 74 514
+@end example
+
+@noindent
+Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
+was printed twice, once for each rule.
+
+@node More Complex, Statements/Lines, Two Rules, Getting Started
+@section A More Complex Example
+
+@ignore
+We have to use ls -lg here to get portable output across Unix systems.
+The POSIX ls matches this behavior too. Sigh.
+@end ignore
+Here is an example to give you an idea of what typical @code{awk}
+programs do. This example shows how @code{awk} can be used to
+summarize, select, and rearrange the output of another utility. It uses
+features that haven't been covered yet, so don't worry if you don't
+understand all the details.
+
+@example
+ls -lg | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
+@end example
+
+@cindex @code{csh}, backslash continuation
+@cindex backslash continuation in @code{csh}
+This command prints the total number of bytes in all the files in the
+current directory that were last modified in November (of any year).
+(In the C shell you would need to type a semicolon and then a backslash
+at the end of the first line; in a POSIX-compliant shell, such as the
+Bourne shell or Bash, the GNU Bourne-Again shell, you can type the example
+as shown.)
+@ignore
+FIXME: how can users tell what shell they are running? Need a footnote
+or something, but getting into this is a distraction.
+@end ignore
+
+The @w{@samp{ls -lg}} part of this example is a system command that gives
+you a listing of the files in a directory, including file size and the date
+the file was last modified. Its output looks like this:
+
+@example
+-rw-r--r-- 1 arnold user 1933 Nov 7 13:05 Makefile
+-rw-r--r-- 1 arnold user 10809 Nov 7 13:03 gawk.h
+-rw-r--r-- 1 arnold user 983 Apr 13 12:14 gawk.tab.h
+-rw-r--r-- 1 arnold user 31869 Jun 15 12:20 gawk.y
+-rw-r--r-- 1 arnold user 22414 Nov 7 13:03 gawk1.c
+-rw-r--r-- 1 arnold user 37455 Nov 7 13:03 gawk2.c
+-rw-r--r-- 1 arnold user 27511 Dec 9 13:07 gawk3.c
+-rw-r--r-- 1 arnold user 7989 Nov 7 13:03 gawk4.c
+@end example
+
+@noindent
+The first field contains read-write permissions, the second field contains
+the number of links to the file, and the third field identifies the owner of
+the file. The fourth field identifies the group of the file.
+The fifth field contains the size of the file in bytes. The
+sixth, seventh and eighth fields contain the month, day, and time,
+respectively, that the file was last modified. Finally, the ninth field
+contains the name of the file.
+
+@cindex automatic initialization
+@cindex initialization, automatic
+The @samp{$6 == "Nov"} in our @code{awk} program is an expression that
+tests whether the sixth field of the output from @w{@samp{ls -lg}}
+matches the string @samp{Nov}. Each time a line has the string
+@samp{Nov} for its sixth field, the action @samp{sum += $5} is
+performed. This adds the fifth field (the file size) to the variable
+@code{sum}. As a result, when @code{awk} has finished reading all the
+input lines, @code{sum} is the sum of the sizes of files whose
+lines matched the pattern. (This works because @code{awk} variables
+are automatically initialized to zero.)
+
+After the last line of output from @code{ls} has been processed, the
+@code{END} rule is executed, and the value of @code{sum} is
+printed. In this example, the value of @code{sum} would be 80600.
+
+These more advanced @code{awk} techniques are covered in later sections
+(@pxref{Action Overview, ,Overview of Actions}). Before you can move on to more
+advanced @code{awk} programming, you have to know how @code{awk} interprets
+your input and displays your output. By manipulating fields and using
+@code{print} statements, you can produce some very useful and impressive
+looking reports.
+
+@node Statements/Lines, Other Features, More Complex, Getting Started
+@section @code{awk} Statements Versus Lines
+@cindex line break
+@cindex newline
+
+Most often, each line in an @code{awk} program is a separate statement or
+separate rule, like this:
+
+@example
+awk '/12/ @{ print $0 @}
+ /21/ @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+However, @code{gawk} will ignore newlines after any of the following:
+
+@example
+, @{ ? : || && do else
+@end example
+
+@noindent
+A newline at any other point is considered the end of the statement.
+(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
+extension. The @samp{?} and @samp{:} referred to here is the
+three operand conditional expression described in
+@ref{Conditional Exp, ,Conditional Expressions}.)
+
+@cindex backslash continuation
+@cindex continuation of lines
+@cindex line continuation
+If you would like to split a single statement into two lines at a point
+where a newline would terminate it, you can @dfn{continue} it by ending the
+first line with a backslash character, @samp{\}. The backslash must be
+the final character on the line to be recognized as a continuation
+character. This is allowed absolutely anywhere in the statement, even
+in the middle of a string or regular expression. For example:
+
+@example
+awk '/This regular expression is too long, so continue it\
+ on the next line/ @{ print $1 @}'
+@end example
+
+@noindent
+@cindex portability issues
+We have generally not used backslash continuation in the sample programs
+in this @value{DOCUMENT}. Since in @code{gawk} there is no limit on the
+length of a line, it is never strictly necessary; it just makes programs
+more readable. For this same reason, as well as for clarity, we have
+kept most statements short in the sample programs presented throughout
+the @value{DOCUMENT}. Backslash continuation is most useful when your
+@code{awk} program is in a separate source file, instead of typed in on
+the command line. You should also note that many @code{awk}
+implementations are more particular about where you may use backslash
+continuation. For example, they may not allow you to split a string
+constant using backslash continuation. Thus, for maximal portability of
+your @code{awk} programs, it is best not to split your lines in the
+middle of a regular expression or a string.
+
+@cindex @code{csh}, backslash continuation
+@cindex backslash continuation in @code{csh}
+@strong{Caution: backslash continuation does not work as described above
+with the C shell.} Continuation with backslash works for @code{awk}
+programs in files, and also for one-shot programs @emph{provided} you
+are using a POSIX-compliant shell, such as the Bourne shell or Bash, the
+GNU Bourne-Again shell. But the C shell (@code{csh}) behaves
+differently! There, you must use two backslashes in a row, followed by
+a newline. Note also that when using the C shell, @emph{every} newline
+in your awk program must be escaped with a backslash. To illustrate:
+
+@example
+% awk 'BEGIN @{ \
+? print \\
+? "hello, world" \
+? @}'
+@print{} hello, world
+@end example
+
+@noindent
+Here, the @samp{%} and @samp{?} are the C shell's primary and secondary
+prompts, analogous to the standard shell's @samp{$} and @samp{>}.
+
+@code{awk} is a line-oriented language. Each rule's action has to
+begin on the same line as the pattern. To have the pattern and action
+on separate lines, you @emph{must} use backslash continuation---there
+is no other way.
+
+@cindex backslash continuation and comments
+@cindex comments and backslash continuation
+Note that backslash continuation and comments do not mix. As soon
+as @code{awk} sees the @samp{#} that starts a comment, it ignores
+@emph{everything} on the rest of the line. For example:
+
+@example
+@group
+$ gawk 'BEGIN @{ print "dont panic" # a friendly \
+> BEGIN rule
+> @}'
+@error{} gawk: cmd. line:2: BEGIN rule
+@error{} gawk: cmd. line:2: ^ parse error
+@end group
+@end example
+
+@noindent
+Here, it looks like the backslash would continue the comment onto the
+next line. However, the backslash-newline combination is never even
+noticed, since it is ``hidden'' inside the comment. Thus, the
+@samp{BEGIN} is noted as a syntax error.
+
+@cindex multiple statements on one line
+When @code{awk} statements within one rule are short, you might want to put
+more than one of them on a line. You do this by separating the statements
+with a semicolon, @samp{;}.
+
+This also applies to the rules themselves.
+Thus, the previous program could have been written:
+
+@example
+/12/ @{ print $0 @} ; /21/ @{ print $0 @}
+@end example
+
+@noindent
+@strong{Note:} the requirement that rules on the same line must be
+separated with a semicolon was not in the original @code{awk}
+language; it was added for consistency with the treatment of statements
+within an action.
+
+@node Other Features, When, Statements/Lines, Getting Started
+@section Other Features of @code{awk}
+
+The @code{awk} language provides a number of predefined, or built-in variables, which
+your programs can use to get information from @code{awk}. There are other
+variables your program can set to control how @code{awk} processes your
+data.
+
+In addition, @code{awk} provides a number of built-in functions for doing
+common computational and string related operations.
+
+As we develop our presentation of the @code{awk} language, we introduce
+most of the variables and many of the functions. They are defined
+systematically in @ref{Built-in Variables}, and
+@ref{Built-in, ,Built-in Functions}.
+
+@node When, , Other Features, Getting Started
+@section When to Use @code{awk}
+
+@cindex when to use @code{awk}
+@cindex applications of @code{awk}
+You might wonder how @code{awk} might be useful for you. Using
+utility programs, advanced patterns, field separators, arithmetic
+statements, and other selection criteria, you can produce much more
+complex output. The @code{awk} language is very useful for producing
+reports from large amounts of raw data, such as summarizing information
+from the output of other utility programs like @code{ls}.
+(@xref{More Complex, ,A More Complex Example}.)
+
+Programs written with @code{awk} are usually much smaller than they would
+be in other languages. This makes @code{awk} programs easy to compose and
+use. Often, @code{awk} programs can be quickly composed at your terminal,
+used once, and thrown away. Since @code{awk} programs are interpreted, you
+can avoid the (usually lengthy) compilation part of the typical
+edit-compile-test-debug cycle of software development.
+
+Complex programs have been written in @code{awk}, including a complete
+retargetable assembler for eight-bit microprocessors (@pxref{Glossary}, for
+more information) and a microcode assembler for a special purpose Prolog
+computer. However, @code{awk}'s capabilities are strained by tasks of
+such complexity.
+
+If you find yourself writing @code{awk} scripts of more than, say, a few
+hundred lines, you might consider using a different programming
+language. Emacs Lisp is a good choice if you need sophisticated string
+or pattern matching capabilities. The shell is also good at string and
+pattern matching; in addition, it allows powerful use of the system
+utilities. More conventional languages, such as C, C++, and Lisp, offer
+better facilities for system programming and for managing the complexity
+of large programs. Programs in these languages may require more lines
+of source code than the equivalent @code{awk} programs, but they are
+easier to maintain and usually run more efficiently.
+
+@node One-liners, Regexp, Getting Started, Top
+@chapter Useful One Line Programs
+
+@cindex one-liners
+Many useful @code{awk} programs are short, just a line or two. Here is a
+collection of useful, short programs to get you started. Some of these
+programs contain constructs that haven't been covered yet. The description
+of the program will give you a good idea of what is going on, but please
+read the rest of the @value{DOCUMENT} to become an @code{awk} expert!
+
+Most of the examples use a data file named @file{data}. This is just a
+placeholder; if you were to use these programs yourself, you would substitute
+your own file names for @file{data}.
+
+@ifinfo
+Since you are reading this in Info, each line of the example code is
+enclosed in quotes, to represent text that you would type literally.
+The examples themselves represent shell commands that use single quotes
+to keep the shell from interpreting the contents of the program.
+When reading the examples, focus on the text between the open and close
+quotes.
+@end ifinfo
+
+@table @code
+@item awk '@{ if (length($0) > max) max = length($0) @}
+@itemx @ @ @ @ @ END @{ print max @}' data
+This program prints the length of the longest input line.
+
+@item awk 'length($0) > 80' data
+This program prints every line that is longer than 80 characters. The sole
+rule has a relational expression as its pattern, and has no action (so the
+default action, printing the record, is used).
+
+@item expand@ data@ |@ awk@ '@{ if (x < length()) x = length() @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "maximum line length is " x @}'
+This program prints the length of the longest line in @file{data}. The input
+is processed by the @code{expand} program to change tabs into spaces,
+so the widths compared are actually the right-margin columns.
+
+@item awk 'NF > 0' data
+This program prints every line that has at least one field. This is an
+easy way to delete blank lines from a file (or rather, to create a new
+file similar to the old file but from which the blank lines have been
+deleted).
+
+@c Karl Berry points out that new users probably don't want to see
+@c multiple ways to do things, just the `best' way. He's probably
+@c right. At some point it might be worth adding something about there
+@c often being multiple ways to do things in awk, but for now we'll
+@c just take this one out.
+@ignore
+@item awk '@{ if (NF > 0) print @}' data
+This program also prints every line that has at least one field. Here we
+allow the rule to match every line, and then decide in the action whether
+to print.
+@end ignore
+
+@item awk@ 'BEGIN@ @{@ for (i = 1; i <= 7; i++)
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ print int(101 * rand()) @}'
+This program prints seven random numbers from zero to 100, inclusive.
+
+@item ls -lg @var{files} | awk '@{ x += $5 @} ; END @{ print "total bytes: " x @}'
+This program prints the total number of bytes used by @var{files}.
+
+@item ls -lg @var{files} | awk '@{ x += $5 @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "total K-bytes: " (x + 1023)/1024 @}'
+This program prints the total number of kilobytes used by @var{files}.
+
+@item awk -F: '@{ print $1 @}' /etc/passwd | sort
+This program prints a sorted list of the login names of all users.
+
+@item awk 'END @{ print NR @}' data
+This program counts lines in a file.
+
+@item awk 'NR % 2 == 0' data
+This program prints the even numbered lines in the data file.
+If you were to use the expression @samp{NR % 2 == 1} instead,
+it would print the odd numbered lines.
+@end table
+
+@node Regexp, Reading Files, One-liners, Top
+@chapter Regular Expressions
+@cindex pattern, regular expressions
+@cindex regexp
+@cindex regular expression
+@cindex regular expressions as patterns
+
+A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
+set of strings.
+Because regular expressions are such a fundamental part of @code{awk}
+programming, their format and use deserve a separate chapter.
+
+A regular expression enclosed in slashes (@samp{/})
+is an @code{awk} pattern that matches every input record whose text
+belongs to that set.
+
+The simplest regular expression is a sequence of letters, numbers, or
+both. Such a regexp matches any string that contains that sequence.
+Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
+Therefore, the pattern @code{/foo/} matches any input record containing
+the three characters @samp{foo}, @emph{anywhere} in the record. Other
+kinds of regexps let you specify more complicated classes of strings.
+
+@iftex
+Initially, the examples will be simple. As we explain more about how
+regular expressions work, we will present more complicated examples.
+@end iftex
+
+@menu
+* Regexp Usage:: How to Use Regular Expressions.
+* Escape Sequences:: How to write non-printing characters.
+* Regexp Operators:: Regular Expression Operators.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Leftmost Longest:: How much text matches.
+* Computed Regexps:: Using Dynamic Regexps.
+@end menu
+
+@node Regexp Usage, Escape Sequences, Regexp, Regexp
+@section How to Use Regular Expressions
+
+A regular expression can be used as a pattern by enclosing it in
+slashes. Then the regular expression is tested against the
+entire text of each record. (Normally, it only needs
+to match some part of the text in order to succeed.) For example, this
+prints the second field of each record that contains the three
+characters @samp{foo} anywhere in it:
+
+@example
+@group
+$ awk '/foo/ @{ print $2 @}' BBS-list
+@print{} 555-1234
+@print{} 555-6699
+@print{} 555-6480
+@print{} 555-2127
+@end group
+@end example
+
+@cindex regexp matching operators
+@cindex string-matching operators
+@cindex operators, string-matching
+@cindex operators, regexp matching
+@cindex regexp match/non-match operators
+@cindex @code{~} operator
+@cindex @code{!~} operator
+Regular expressions can also be used in matching expressions. These
+expressions allow you to specify the string to match against; it need
+not be the entire current input record. The two operators, @samp{~}
+and @samp{!~}, perform regular expression comparisons. Expressions
+using these operators can be used as patterns or in @code{if},
+@code{while}, @code{for}, and @code{do} statements.
+@ifinfo
+@c adding this xref in TeX screws up the formatting too much
+(@xref{Statements, ,Control Statements in Actions}.)
+@end ifinfo
+
+@table @code
+@item @var{exp} ~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a string)
+is matched by @var{regexp}. The following example matches, or selects,
+all input records with the upper-case letter @samp{J} somewhere in the
+first field:
+
+@example
+@group
+$ awk '$1 ~ /J/' inventory-shipped
+@print{} Jan 13 25 15 115
+@print{} Jun 31 42 75 492
+@print{} Jul 24 34 67 436
+@print{} Jan 21 36 64 620
+@end group
+@end example
+
+So does this:
+
+@example
+awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
+@end example
+
+@item @var{exp} !~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is @emph{not} matched by @var{regexp}. The following example matches,
+or selects, all input records whose first field @emph{does not} contain
+the upper-case letter @samp{J}:
+
+@example
+@group
+$ awk '$1 !~ /J/' inventory-shipped
+@print{} Feb 15 32 24 226
+@print{} Mar 15 24 34 228
+@print{} Apr 31 52 63 420
+@print{} May 16 34 29 208
+@dots{}
+@end group
+@end example
+@end table
+
+@cindex regexp constant
+When a regexp is written enclosed in slashes, like @code{/foo/}, we call it
+a @dfn{regexp constant}, much like @code{5.27} is a numeric constant, and
+@code{"foo"} is a string constant.
+
+@node Escape Sequences, Regexp Operators, Regexp Usage, Regexp
+@section Escape Sequences
+
+@cindex escape sequence notation
+Some characters cannot be included literally in string constants
+(@code{"foo"}) or regexp constants (@code{/foo/}). You represent them
+instead with @dfn{escape sequences}, which are character sequences
+beginning with a backslash (@samp{\}).
+
+One use of an escape sequence is to include a double-quote character in
+a string constant. Since a plain double-quote would end the string, you
+must use @samp{\"} to represent an actual double-quote character as a
+part of the string. For example:
+
+@example
+$ awk 'BEGIN @{ print "He said \"hi!\" to her." @}'
+@print{} He said "hi!" to her.
+@end example
+
+The backslash character itself is another character that cannot be
+included normally; you write @samp{\\} to put one backslash in the
+string or regexp. Thus, the string whose contents are the two characters
+@samp{"} and @samp{\} must be written @code{"\"\\"}.
+
+Another use of backslash is to represent unprintable characters
+such as tab or newline. While there is nothing to stop you from entering most
+unprintable characters directly in a string constant or regexp constant,
+they may look ugly.
+
+Here is a table of all the escape sequences used in @code{awk}, and
+what they represent. Unless noted otherwise, all of these escape
+sequences apply to both string constants and regexp constants.
+
+@c @cartouche
+@table @code
+@item \\
+A literal backslash, @samp{\}.
+
+@cindex @code{awk} language, V.4 version
+@item \a
+The ``alert'' character, @kbd{Control-g}, ASCII code 7 (BEL).
+
+@item \b
+Backspace, @kbd{Control-h}, ASCII code 8 (BS).
+
+@item \f
+Formfeed, @kbd{Control-l}, ASCII code 12 (FF).
+
+@item \n
+Newline, @kbd{Control-j}, ASCII code 10 (LF).
+
+@item \r
+Carriage return, @kbd{Control-m}, ASCII code 13 (CR).
+
+@item \t
+Horizontal tab, @kbd{Control-i}, ASCII code 9 (HT).
+
+@cindex @code{awk} language, V.4 version
+@item \v
+Vertical tab, @kbd{Control-k}, ASCII code 11 (VT).
+
+@item \@var{nnn}
+The octal value @var{nnn}, where @var{nnn} are one to three digits
+between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
+(escape) character is @samp{\033}.
+
+@cindex @code{awk} language, V.4 version
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item \x@var{hh}@dots{}
+The hexadecimal value @var{hh}, where @var{hh} are hexadecimal
+digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
+@samp{a} through @samp{f}). Like the same construct in ANSI C, the escape
+sequence continues until the first non-hexadecimal digit is seen. However,
+using more than two hexadecimal digits produces undefined results. (The
+@samp{\x} escape sequence is not allowed in POSIX @code{awk}.)
+
+@item \/
+A literal slash (necessary for regexp constants only).
+You use this when you wish to write a regexp
+constant that contains a slash. Since the regexp is delimited by
+slashes, you need to escape the slash that is part of the pattern,
+in order to tell @code{awk} to keep processing the rest of the regexp.
+
+@item \"
+A literal double-quote (necessary for string constants only).
+You use this when you wish to write a string
+constant that contains a double-quote. Since the string is delimited by
+double-quotes, you need to escape the quote that is part of the string,
+in order to tell @code{awk} to keep processing the rest of the string.
+@end table
+@c @end cartouche
+
+In @code{gawk}, there are additional two character sequences that begin
+with backslash that have special meaning in regexps.
+@xref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}}.
+
+In a string constant,
+what happens if you place a backslash before something that is not one of
+the characters listed above? POSIX @code{awk} purposely leaves this case
+undefined. There are two choices.
+
+@itemize @bullet
+@item
+Strip the backslash out. This is what Unix @code{awk} and @code{gawk} both do.
+For example, @code{"a\qc"} is the same as @code{"aqc"}.
+
+@item
+Leave the backslash alone. Some other @code{awk} implementations do this.
+In such implementations, @code{"a\qc"} is the same as if you had typed
+@code{"a\\qc"}.
+@end itemize
+
+In a regexp, a backslash before any character that is not in the above table,
+and not listed in
+@ref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}},
+means that the next character should be taken literally, even if it would
+normally be a regexp operator. E.g., @code{/a\+b/} matches the three
+characters @samp{a+b}.
+
+@cindex portability issues
+For complete portability, do not use a backslash before any character not
+listed in the table above.
+
+Another interesting question arises. Suppose you use an octal or hexadecimal
+escape to represent a regexp metacharacter
+(@pxref{Regexp Operators, , Regular Expression Operators}).
+Does @code{awk} treat the character as literal character, or as a regexp
+operator?
+
+@cindex dark corner
+It turns out that historically, such characters were taken literally (d.c.).
+However, the POSIX standard indicates that they should be treated
+as real metacharacters, and this is what @code{gawk} does.
+However, in compatibility mode (@pxref{Options, ,Command Line Options}),
+@code{gawk} treats the characters represented by octal and hexadecimal
+escape sequences literally when used in regexp constants. Thus,
+@code{/a\52b/} is equivalent to @code{/a\*b/}.
+
+To summarize:
+
+@enumerate 1
+@item
+The escape sequences in the table above are always processed first,
+for both string constants and regexp constants. This happens very early,
+as soon as @code{awk} reads your program.
+
+@item
+@code{gawk} processes both regexp constants and dynamic regexps
+(@pxref{Computed Regexps, ,Using Dynamic Regexps}),
+for the special operators listed in
+@ref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}}.
+
+@item
+A backslash before any other character means to treat that character
+literally.
+@end enumerate
+
+@node Regexp Operators, GNU Regexp Operators, Escape Sequences, Regexp
+@section Regular Expression Operators
+@cindex metacharacters
+@cindex regular expression metacharacters
+@cindex regexp operators
+
+You can combine regular expressions with the following characters,
+called @dfn{regular expression operators}, or @dfn{metacharacters}, to
+increase the power and versatility of regular expressions.
+
+The escape sequences described
+@iftex
+above
+@end iftex
+in @ref{Escape Sequences},
+are valid inside a regexp. They are introduced by a @samp{\}. They
+are recognized and converted into the corresponding real characters as
+the very first step in processing regexps.
+
+Here is a table of metacharacters. All characters that are not escape
+sequences and that are not listed in the table stand for themselves.
+
+@table @code
+@item \
+This is used to suppress the special meaning of a character when
+matching. For example:
+
+@example
+\$
+@end example
+
+@noindent
+matches the character @samp{$}.
+
+@c NEEDED
+@page
+@cindex anchors in regexps
+@cindex regexp, anchors
+@item ^
+This matches the beginning of a string. For example:
+
+@example
+^@@chapter
+@end example
+
+@noindent
+matches the @samp{@@chapter} at the beginning of a string, and can be used
+to identify chapter beginnings in Texinfo source files.
+The @samp{^} is known as an @dfn{anchor}, since it anchors the pattern to
+matching only at the beginning of the string.
+
+It is important to realize that @samp{^} does not match the beginning of
+a line embedded in a string. In this example the condition is not true:
+
+@example
+if ("line1\nLINE 2" ~ /^L/) @dots{}
+@end example
+
+@item $
+This is similar to @samp{^}, but it matches only at the end of a string.
+For example:
+
+@example
+p$
+@end example
+
+@noindent
+matches a record that ends with a @samp{p}. The @samp{$} is also an anchor,
+and also does not match the end of a line embedded in a string. In this
+example the condition is not true:
+
+@example
+if ("line1\nLINE 2" ~ /1$/) @dots{}
+@end example
+
+@item .
+The period, or dot, matches any single character,
+@emph{including} the newline character. For example:
+
+@example
+.P
+@end example
+
+@noindent
+matches any single character followed by a @samp{P} in a string. Using
+concatenation we can make a regular expression like @samp{U.A}, which
+matches any three-character sequence that begins with @samp{U} and ends
+with @samp{A}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In strict POSIX mode (@pxref{Options, ,Command Line Options}),
+@samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+Otherwise, @sc{nul} is just another character. Other versions of @code{awk}
+may not be able to match the @sc{nul} character.
+
+@ignore
+2e: Add stuff that character list is the POSIX terminology. In other
+ literature known as character set or character class.
+@end ignore
+
+@cindex character list
+@item [@dots{}]
+This is called a @dfn{character list}. It matches any @emph{one} of the
+characters that are enclosed in the square brackets. For example:
+
+@example
+[MVX]
+@end example
+
+@noindent
+matches any one of the characters @samp{M}, @samp{V}, or @samp{X} in a
+string.
+
+Ranges of characters are indicated by using a hyphen between the beginning
+and ending characters, and enclosing the whole thing in brackets. For
+example:
+
+@example
+[0-9]
+@end example
+
+@noindent
+matches any digit.
+Multiple ranges are allowed. E.g., the list @code{@w{[A-Za-z0-9]}} is a
+common way to express the idea of ``all alphanumeric characters.''
+
+To include one of the characters @samp{\}, @samp{]}, @samp{-} or @samp{^} in a
+character list, put a @samp{\} in front of it. For example:
+
+@example
+[d\]]
+@end example
+
+@noindent
+matches either @samp{d}, or @samp{]}.
+
+@cindex @code{egrep}
+This treatment of @samp{\} in character lists
+is compatible with other @code{awk}
+implementations, and is also mandated by POSIX.
+The regular expressions in @code{awk} are a superset
+of the POSIX specification for Extended Regular Expressions (EREs).
+POSIX EREs are based on the regular expressions accepted by the
+traditional @code{egrep} utility.
+
+@cindex character classes
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@dfn{Character classes} are a new feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but where the
+actual characters themselves can vary from country to country and/or
+from character set to character set. For example, the notion of what
+is an alphabetic character differs in the USA and in France.
+
+A character class is only valid in a regexp @emph{inside} the
+brackets of a character list. Character classes consist of @samp{[:},
+a keyword denoting the class, and @samp{:]}. Here are the character
+classes defined by the POSIX standard.
+
+@table @code
+@item [:alnum:]
+Alphanumeric characters.
+
+@item [:alpha:]
+Alphabetic characters.
+
+@item [:blank:]
+Space and tab characters.
+
+@item [:cntrl:]
+Control characters.
+
+@item [:digit:]
+Numeric characters.
+
+@item [:graph:]
+Characters that are printable and are also visible.
+(A space is printable, but not visible, while an @samp{a} is both.)
+
+@item [:lower:]
+Lower-case alphabetic characters.
+
+@item [:print:]
+Printable characters (characters that are not control characters.)
+
+@item [:punct:]
+Punctuation characters (characters that are not letter, digits,
+control characters, or space characters).
+
+@item [:space:]
+Space characters (such as space, tab, and formfeed, to name a few).
+
+@item [:upper:]
+Upper-case alphabetic characters.
+
+@item [:xdigit:]
+Characters that are hexadecimal digits.
+@end table
+
+For example, before the POSIX standard, to match alphanumeric
+characters, you had to write @code{/[A-Za-z0-9]/}. If your
+character set had other alphabetic characters in it, this would not
+match them. With the POSIX character classes, you can write
+@code{/[[:alnum:]]/}, and this will match @emph{all} the alphabetic
+and numeric characters in your character set.
+
+@cindex collating elements
+Two additional special sequences can appear in character lists.
+These apply to non-ASCII character sets, which can have single symbols
+(called @dfn{collating elements}) that are represented with more than one
+character, as well as several characters that are equivalent for
+@dfn{collating}, or sorting, purposes. (E.g., in French, a plain ``e''
+and a grave-accented ``@`e'' are equivalent.)
+
+@table @asis
+@cindex collating symbols
+@item Collating Symbols
+A @dfn{collating symbol} is a multi-character collating element enclosed in
+@samp{[.} and @samp{.]}. For example, if @samp{ch} is a collating element,
+then @code{[[.ch.]]} is a regexp that matches this collating element, while
+@code{[ch]} is a regexp that matches either @samp{c} or @samp{h}.
+
+@cindex equivalence classes
+@item Equivalence Classes
+An @dfn{equivalence class} is a locale-specific name for a list of
+characters that are equivalent. The name is enclosed in
+@samp{[=} and @samp{=]}.
+For example, the name @samp{e} might be used to represent all of
+``e,'' ``@`e,'' and ``@'e.'' In this case, @code{[[=e]]} is a regexp
+that matches any of @samp{e}, @samp{@'e}, or @samp{@`e}.
+@end table
+
+These features are very valuable in non-English speaking locales.
+
+@strong{Caution:} The library functions that @code{gawk} uses for regular
+expression matching currently only recognize POSIX character classes;
+they do not recognize collating symbols or equivalence classes.
+@c maybe one day ...
+
+@cindex complemented character list
+@cindex character list, complemented
+@item [^ @dots{}]
+This is a @dfn{complemented character list}. The first character after
+the @samp{[} @emph{must} be a @samp{^}. It matches any characters
+@emph{except} those in the square brackets. For example:
+
+@example
+[^0-9]
+@end example
+
+@noindent
+matches any character that is not a digit.
+
+@item |
+This is the @dfn{alternation operator}, and it is used to specify
+alternatives. For example:
+
+@example
+^P|[0-9]
+@end example
+
+@noindent
+matches any string that matches either @samp{^P} or @samp{[0-9]}. This
+means it matches any string that starts with @samp{P} or contains a digit.
+
+The alternation applies to the largest possible regexps on either side.
+In other words, @samp{|} has the lowest precedence of all the regular
+expression operators.
+
+@item (@dots{})
+Parentheses are used for grouping in regular expressions as in
+arithmetic. They can be used to concatenate regular expressions
+containing the alternation operator, @samp{|}. For example,
+@samp{@@(samp|code)\@{[^@}]+\@}} matches both @samp{@@code@{foo@}} and
+@samp{@@samp@{bar@}}. (These are Texinfo formatting control sequences.)
+
+@item *
+This symbol means that the preceding regular expression is to be
+repeated as many times as necessary to find a match. For example:
+
+@example
+ph*
+@end example
+
+@noindent
+applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
+of one @samp{p} followed by any number of @samp{h}s. This will also match
+just @samp{p} if no @samp{h}s are present.
+
+The @samp{*} repeats the @emph{smallest} possible preceding expression.
+(Use parentheses if you wish to repeat a larger expression.) It finds
+as many repetitions as possible. For example:
+
+@example
+awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
+@end example
+
+@noindent
+prints every record in @file{sample} containing a string of the form
+@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
+Notice the escaping of the parentheses by preceding them
+with backslashes.
+
+@item +
+This symbol is similar to @samp{*}, but the preceding expression must be
+matched at least once. This means that:
+
+@example
+wh+y
+@end example
+
+@noindent
+would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas
+@samp{wh*y} would match all three of these strings. This is a simpler
+way of writing the last @samp{*} example:
+
+@example
+awk '/\(c[ad]+r x\)/ @{ print @}' sample
+@end example
+
+@item ?
+This symbol is similar to @samp{*}, but the preceding expression can be
+matched either once or not at all. For example:
+
+@example
+fe?d
+@end example
+
+@noindent
+will match @samp{fed} and @samp{fd}, but nothing else.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex interval expressions
+@item @{@var{n}@}
+@itemx @{@var{n},@}
+@itemx @{@var{n},@var{m}@}
+One or two numbers inside braces denote an @dfn{interval expression}.
+If there is one number in the braces, the preceding regexp is repeated
+@var{n} times.
+If there are two numbers separated by a comma, the preceding regexp is
+repeated @var{n} to @var{m} times.
+If there is one number followed by a comma, then the preceding regexp
+is repeated at least @var{n} times.
+
+@table @code
+@item wh@{3@}y
+matches @samp{whhhy} but not @samp{why} or @samp{whhhhy}.
+
+@item wh@{3,5@}y
+matches @samp{whhhy} or @samp{whhhhy} or @samp{whhhhhy}, only.
+
+@item wh@{2,@}y
+matches @samp{whhy} or @samp{whhhy}, and so on.
+@end table
+
+Interval expressions were not traditionally available in @code{awk}.
+As part of the POSIX standard they were added, to make @code{awk}
+and @code{egrep} consistent with each other.
+
+However, since old programs may use @samp{@{} and @samp{@}} in regexp
+constants, by default @code{gawk} does @emph{not} match interval expressions
+in regexps. If either @samp{--posix} or @samp{--re-interval} are specified
+(@pxref{Options, , Command Line Options}), then interval expressions
+are allowed in regexps.
+@end table
+
+@cindex precedence, regexp operators
+@cindex regexp operators, precedence of
+In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators,
+as well as the braces @samp{@{} and @samp{@}},
+have
+the highest precedence, followed by concatenation, and finally by @samp{|}.
+As in arithmetic, parentheses can change how operators are grouped.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+character classes and interval expressions are not available in
+regular expressions.
+
+The next
+@ifinfo
+node
+@end ifinfo
+@iftex
+section
+@end iftex
+discusses the GNU-specific regexp operators, and provides
+more detail concerning how command line options affect the way @code{gawk}
+interprets the characters in regular expressions.
+
+@node GNU Regexp Operators, Case-sensitivity, Regexp Operators, Regexp
+@section Additional Regexp Operators Only in @code{gawk}
+
+@c This section adapted from the regex-0.12 manual
+
+@cindex regexp operators, GNU specific
+GNU software that deals with regular expressions provides a number of
+additional regexp operators. These operators are described in this
+section, and are specific to @code{gawk}; they are not available in other
+@code{awk} implementations.
+
+@cindex word, regexp definition of
+Most of the additional operators are for dealing with word matching.
+For our purposes, a @dfn{word} is a sequence of one or more letters, digits,
+or underscores (@samp{_}).
+
+@table @code
+@cindex @code{\w} regexp operator
+@item \w
+This operator matches any word-constituent character, i.e.@: any
+letter, digit, or underscore. Think of it as a short-hand for
+@c @w{@code{[A-Za-z0-9_]}} or
+@w{@code{[[:alnum:]_]}}.
+
+@cindex @code{\W} regexp operator
+@item \W
+This operator matches any character that is not word-constituent.
+Think of it as a short-hand for
+@c @w{@code{[^A-Za-z0-9_]}} or
+@w{@code{[^[:alnum:]_]}}.
+
+@cindex @code{\<} regexp operator
+@item \<
+This operator matches the empty string at the beginning of a word.
+For example, @code{/\<away/} matches @samp{away}, but not
+@samp{stowaway}.
+
+@cindex @code{\>} regexp operator
+@item \>
+This operator matches the empty string at the end of a word.
+For example, @code{/stow\>/} matches @samp{stow}, but not @samp{stowaway}.
+
+@cindex @code{\y} regexp operator
+@cindex word boundaries, matching
+@item \y
+This operator matches the empty string at either the beginning or the
+end of a word (the word boundar@strong{y}). For example, @samp{\yballs?\y}
+matches either @samp{ball} or @samp{balls} as a separate word.
+
+@cindex @code{\B} regexp operator
+@item \B
+This operator matches the empty string within a word. In other words,
+@samp{\B} matches the empty string that occurs between two
+word-constituent characters. For example,
+@code{/\Brat\B/} matches @samp{crate}, but it does not match @samp{dirty rat}.
+@samp{\B} is essentially the opposite of @samp{\y}.
+@end table
+
+There are two other operators that work on buffers. In Emacs, a
+@dfn{buffer} is, naturally, an Emacs buffer. For other programs, the
+regexp library routines that @code{gawk} uses consider the entire
+string to be matched as the buffer.
+
+For @code{awk}, since @samp{^} and @samp{$} always work in terms
+of the beginning and end of strings, these operators don't add any
+new capabilities. They are provided for compatibility with other GNU
+software.
+
+@cindex buffer matching operators
+@table @code
+@cindex @code{\`} regexp operator
+@item \`
+This operator matches the empty string at the
+beginning of the buffer.
+
+@cindex @code{\'} regexp operator
+@item \'
+This operator matches the empty string at the
+end of the buffer.
+@end table
+
+In other GNU software, the word boundary operator is @samp{\b}. However,
+that conflicts with the @code{awk} language's definition of @samp{\b}
+as backspace, so @code{gawk} uses a different letter.
+
+An alternative method would have been to require two backslashes in the
+GNU operators, but this was deemed to be too confusing, and the current
+method of using @samp{\y} for the GNU @samp{\b} appears to be the
+lesser of two evils.
+
+@c NOTE!!! Keep this in sync with the same table in the summary appendix!
+@cindex regexp, effect of command line options
+The various command line options
+(@pxref{Options, ,Command Line Options})
+control how @code{gawk} interprets characters in regexps.
+
+@table @asis
+@item No options
+In the default case, @code{gawk} provide all the facilities of
+POSIX regexps and the GNU regexp operators described
+@iftex
+above.
+@end iftex
+@ifinfo
+in @ref{Regexp Operators, ,Regular Expression Operators}.
+@end ifinfo
+However, interval expressions are not supported.
+
+@item @code{--posix}
+Only POSIX regexps are supported, the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}). Interval expressions
+are allowed.
+
+@item @code{--traditional}
+Traditional Unix @code{awk} regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes (@code{[[:alnum:]]} and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, even if @samp{--traditional}
+has been provided.
+@end table
+
+@node Case-sensitivity, Leftmost Longest, GNU Regexp Operators, Regexp
+@section Case-sensitivity in Matching
+
+@cindex case sensitivity
+@cindex ignoring case
+Case is normally significant in regular expressions, both when matching
+ordinary characters (i.e.@: not metacharacters), and inside character
+sets. Thus a @samp{w} in a regular expression matches only a lower-case
+@samp{w} and not an upper-case @samp{W}.
+
+The simplest way to do a case-independent match is to use a character
+list: @samp{[Ww]}. However, this can be cumbersome if you need to use it
+often; and it can make the regular expressions harder to
+read. There are two alternatives that you might prefer.
+
+One way to do a case-insensitive match at a particular point in the
+program is to convert the data to a single case, using the
+@code{tolower} or @code{toupper} built-in string functions (which we
+haven't discussed yet;
+@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For example:
+
+@example
+tolower($1) ~ /foo/ @{ @dots{} @}
+@end example
+
+@noindent
+converts the first field to lower-case before matching against it.
+This will work in any POSIX-compliant implementation of @code{awk}.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex @code{~} operator
+@cindex @code{!~} operator
+@vindex IGNORECASE
+Another method, specific to @code{gawk}, is to set the variable
+@code{IGNORECASE} to a non-zero value (@pxref{Built-in Variables}).
+When @code{IGNORECASE} is not zero, @emph{all} regexp and string
+operations ignore case. Changing the value of
+@code{IGNORECASE} dynamically controls the case sensitivity of your
+program as it runs. Case is significant by default because
+@code{IGNORECASE} (like most variables) is initialized to zero.
+
+@example
+@group
+x = "aB"
+if (x ~ /ab/) @dots{} # this test will fail
+@end group
+
+@group
+IGNORECASE = 1
+if (x ~ /ab/) @dots{} # now it will succeed
+@end group
+@end example
+
+In general, you cannot use @code{IGNORECASE} to make certain rules
+case-insensitive and other rules case-sensitive, because there is no way
+to set @code{IGNORECASE} just for the pattern of a particular rule.
+@ignore
+This isn't quite true. Consider:
+
+ IGNORECASE=1 && /foObAr/ { .... }
+ IGNORECASE=0 || /foobar/ { .... }
+
+But that's pretty bad style and I don't want to get into it at this
+late date.
+@end ignore
+To do this, you must use character lists or @code{tolower}. However, one
+thing you can do only with @code{IGNORECASE} is turn case-sensitivity on
+or off dynamically for all the rules at once.
+
+@code{IGNORECASE} can be set on the command line, or in a @code{BEGIN} rule
+(@pxref{Other Arguments, ,Other Command Line Arguments}; also
+@pxref{Using BEGIN/END, ,Startup and Cleanup Actions}).
+Setting @code{IGNORECASE} from the command line is a way to make
+a program case-insensitive without having to edit it.
+
+Prior to version 3.0 of @code{gawk}, the value of @code{IGNORECASE}
+only affected regexp operations. It did not affect string comparison
+with @samp{==}, @samp{!=}, and so on.
+Beginning with version 3.0, both regexp and string comparison
+operations are affected by @code{IGNORECASE}.
+
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+Beginning with version 3.0 of @code{gawk}, the equivalences between upper-case
+and lower-case characters are based on the ISO-8859-1 (ISO Latin-1)
+character set. This character set is a superset of the traditional 128
+ASCII characters, that also provides a number of characters suitable
+for use with European languages.
+@ignore
+A pure ASCII character set can be used instead if @code{gawk} is compiled
+with @samp{-DUSE_PURE_ASCII}.
+@end ignore
+
+The value of @code{IGNORECASE} has no effect if @code{gawk} is in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+Case is always significant in compatibility mode.
+
+@node Leftmost Longest, Computed Regexps, Case-sensitivity, Regexp
+@section How Much Text Matches?
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+Consider the following example:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub} function (which we haven't discussed yet,
+@pxref{String Functions, ,Built-in Functions for String Manipulation})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters. What will the output be?
+In other words, how many is ``one or more''---will @code{awk} match two,
+three, or all four @samp{a} characters?
+
+The answer is, @code{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, in this example, all four @samp{a} characters are
+replaced with @samp{<A>}.
+
+@example
+$ echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+regexp-based field and record splitting, and
+text matching and substitutions with the @code{match}, @code{sub}, @code{gsub},
+and @code{gensub} functions, it is very important.
+@ifinfo
+@xref{String Functions, ,Built-in Functions for String Manipulation},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records, ,How Input is Split into Records},
+and also @pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@node Computed Regexps, , Leftmost Longest, Regexp
+@section Using Dynamic Regexps
+
+@cindex computed regular expressions
+@cindex regular expressions, computed
+@cindex dynamic regular expressions
+@cindex regexp, dynamic
+@cindex @code{~} operator
+@cindex @code{!~} operator
+The right hand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e.@: a string of characters between slashes). It may
+be any expression. The expression is evaluated, and converted if
+necessary to a string; the contents of the string are used as the
+regexp. A regexp that is computed in this way is called a @dfn{dynamic
+regexp}. For example:
+
+@example
+BEGIN @{ identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+" @}
+$0 ~ identifier_regexp @{ print @}
+@end example
+
+@noindent
+sets @code{identifier_regexp} to a regexp that describes @code{awk}
+variable names, and tests if the input record matches this regexp.
+
+@strong{Caution:} When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes, and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is in essence scanned @emph{twice}; the first time when
+@code{awk} reads your program, and the second time when it goes to
+match the string on the left-hand side of the operator with the pattern
+on the right. This is true of any string valued expression (such as
+@code{identifier_regexp} above), not just string constants.
+
+@cindex regexp constants, difference between slashes and quotes
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you would have to type @code{"\\*"}. The first backslash escapes the
+second one, so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons.
+
+@enumerate 1
+@item
+String constants are more complicated to write, and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is also more efficient to use regexp constants: @code{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@code{awk} must first convert the string into this internal form, and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better style; it shows clearly that you
+intend a regexp match.
+@end enumerate
+
+@node Reading Files, Printing, Regexp, Top
+@chapter Reading Input Files
+
+@cindex reading files
+@cindex input
+@cindex standard input
+@vindex FILENAME
+In the typical @code{awk} program, all input is read either from the
+standard input (by default the keyboard, but often a pipe from another
+command) or from files whose names you specify on the @code{awk} command
+line. If you specify input files, @code{awk} reads them in order, reading
+all the data from one before going on to the next. The name of the current
+input file can be found in the built-in variable @code{FILENAME}
+(@pxref{Built-in Variables}).
+
+The input is read in units called @dfn{records}, and processed by the
+rules of your program one record at a time.
+By default, each record is one line. Each
+record is automatically split into chunks called @dfn{fields}.
+This makes it more convenient for programs to work on the parts of a record.
+
+On rare occasions you will need to use the @code{getline} command.
+The @code{getline} command is valuable, both because it
+can do explicit input from any number of files, and because the files
+used with it do not have to be named on the @code{awk} command line
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+
+@menu
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Non-Constant Fields:: Non-constant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Constant Size:: Reading constant width data.
+* Multiple Line:: Reading multi-line records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+@end menu
+
+@node Records, Fields, Reading Files, Reading Files
+@section How Input is Split into Records
+
+@cindex record separator, @code{RS}
+@cindex changing the record separator
+@cindex record, definition of
+@vindex RS
+The @code{awk} utility divides the input for your @code{awk}
+program into records and fields.
+Records are separated by a character called the @dfn{record separator}.
+By default, the record separator is the newline character.
+This is why records are, by default, single lines.
+You can use a different character for the record separator by
+assigning the character to the built-in variable @code{RS}.
+
+You can change the value of @code{RS} in the @code{awk} program,
+like any other variable, with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+The new record-separator character should be enclosed in quotation marks,
+which indicate
+a string constant. Often the right time to do this is at the beginning
+of execution, before any input has been processed, so that the very
+first record will be read with the proper separator. To do this, use
+the special @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). For
+example:
+
+@example
+awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+changes the value of @code{RS} to @code{"/"}, before reading any input.
+This is a string whose first character is a slash; as a result, records
+are separated by slashes. Then the input file is read, and the second
+rule in the @code{awk} program (the action with no pattern) prints each
+record. Since each @code{print} statement adds a newline at the end of
+its output, the effect of this @code{awk} program is to copy the input
+with each slash changed to a newline. Here are the results of running
+the program on @file{BBS-list}:
+
+@example
+@group
+$ awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@print{} aardvark 555-5553 1200
+@print{} 300 B
+@print{} alpo-net 555-3412 2400
+@print{} 1200
+@print{} 300 A
+@print{} barfly 555-7685 1200
+@print{} 300 A
+@print{} bites 555-1675 2400
+@print{} 1200
+@print{} 300 A
+@print{} camelot 555-0542 300 C
+@print{} core 555-2912 1200
+@print{} 300 C
+@print{} fooey 555-1234 2400
+@print{} 1200
+@print{} 300 B
+@print{} foot 555-6699 1200
+@print{} 300 B
+@print{} macfoo 555-6480 1200
+@print{} 300 A
+@print{} sdace 555-3430 2400
+@print{} 1200
+@print{} 300 A
+@print{} sabafoo 555-2127 1200
+@print{} 300 C
+@print{}
+@end group
+@end example
+
+@noindent
+Note that the entry for the @samp{camelot} BBS is not split.
+In the original data file
+(@pxref{Sample Data Files, , Data Files for the Examples}),
+the line looks like this:
+
+@example
+camelot 555-0542 300 C
+@end example
+
+@noindent
+It only has one baud rate; there are no slashes in the record.
+
+Another way to change the record separator is on the command line,
+using the variable-assignment feature
+(@pxref{Other Arguments, ,Other Command Line Arguments}).
+
+@example
+awk '@{ print $0 @}' RS="/" BBS-list
+@end example
+
+@noindent
+This sets @code{RS} to @samp{/} before processing @file{BBS-list}.
+
+Using an unusual character such as @samp{/} for the record separator
+produces correct behavior in the vast majority of cases. However,
+the following (extreme) pipeline prints a surprising @samp{1}. There
+is one field, consisting of a newline. The value of the built-in
+variable @code{NF} is the number of fields in the current record.
+
+@example
+$ echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
+@print{} 1
+@end example
+
+@cindex dark corner
+@noindent
+Reaching the end of an input file terminates the current input record,
+even if the last character in the file is not the character in @code{RS}
+(d.c.).
+
+@cindex empty string
+The empty string, @code{""} (a string of no characters), has a special meaning
+as the value of @code{RS}: it means that records are separated
+by one or more blank lines, and nothing else.
+@xref{Multiple Line, ,Multiple-Line Records}, for more details.
+
+If you change the value of @code{RS} in the middle of an @code{awk} run,
+the new value is used to delimit subsequent records, but the record
+currently being processed (and records already processed) are not
+affected.
+
+@vindex RT
+@cindex record terminator, @code{RT}
+@cindex terminator, record
+@cindex differences between @code{gawk} and @code{awk}
+After the end of the record has been determined, @code{gawk}
+sets the variable @code{RT} to the text in the input that matched
+@code{RS}.
+
+@cindex regular expressions as record separators
+The value of @code{RS} is in fact not limited to a one-character
+string. It can be any regular expression
+(@pxref{Regexp, ,Regular Expressions}).
+In general, each record
+ends at the next string that matches the regular expression; the next
+record starts at the end of the matching string. This general rule is
+actually at work in the usual case, where @code{RS} contains just a
+newline: a record ends at the beginning of the next matching string (the
+next newline in the input) and the following record starts just after
+the end of this string (at the first character of the following line).
+The newline, since it matches @code{RS}, is not part of either record.
+
+When @code{RS} is a single character, @code{RT} will
+contain the same single character. However, when @code{RS} is a
+regular expression, then @code{RT} becomes more useful; it contains
+the actual input text that matched the regular expression.
+
+The following example illustrates both of these features.
+It sets @code{RS} equal to a regular expression that
+matches either a newline, or a series of one or more upper-case letters
+with optional leading and/or trailing white space
+(@pxref{Regexp, , Regular Expressions}).
+
+@example
+$ echo record 1 AAAA record 2 BBBB record 3 |
+> gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}
+> @{ print "Record =", $0, "and RT =", RT @}'
+@print{} Record = record 1 and RT = AAAA
+@print{} Record = record 2 and RT = BBBB
+@print{} Record = record 3 and RT =
+@print{}
+@end example
+
+@noindent
+The final line of output has an extra blank line. This is because the
+value of @code{RT} is a newline, and then the @code{print} statement
+supplies its own terminating newline.
+
+@xref{Simple Sed, ,A Simple Stream Editor}, for a more useful example
+of @code{RS} as a regexp and @code{RT}.
+
+@cindex differences between @code{gawk} and @code{awk}
+The use of @code{RS} as a regular expression and the @code{RT}
+variable are @code{gawk} extensions; they are not available in
+compatibility mode
+(@pxref{Options, ,Command Line Options}).
+In compatibility mode, only the first character of the value of
+@code{RS} is used to determine the end of the record.
+
+@cindex number of records, @code{NR}, @code{FNR}
+@vindex NR
+@vindex FNR
+The @code{awk} utility keeps track of the number of records that have
+been read so far from the current input file. This value is stored in a
+built-in variable called @code{FNR}. It is reset to zero when a new
+file is started. Another built-in variable, @code{NR}, is the total
+number of input records read so far from all data files. It starts at zero
+but is never automatically reset to zero.
+
+@node Fields, Non-Constant Fields, Records, Reading Files
+@section Examining Fields
+
+@cindex examining fields
+@cindex fields
+@cindex accessing fields
+When @code{awk} reads an input record, the record is
+automatically separated or @dfn{parsed} by the interpreter into chunks
+called @dfn{fields}. By default, fields are separated by whitespace,
+like words in a line.
+Whitespace in @code{awk} means any string of one or more spaces,
+tabs or newlines;@footnote{In POSIX @code{awk}, newlines are not
+considered whitespace for separating fields.} other characters such as
+formfeed, and so on, that are
+considered whitespace by other languages are @emph{not} considered
+whitespace by @code{awk}.
+
+The purpose of fields is to make it more convenient for you to refer to
+these pieces of the record. You don't have to use them---you can
+operate on the whole record if you wish---but fields are what make
+simple @code{awk} programs so powerful.
+
+@cindex @code{$} (field operator)
+@cindex field operator @code{$}
+To refer to a field in an @code{awk} program, you use a dollar-sign,
+@samp{$}, followed by the number of the field you want. Thus, @code{$1}
+refers to the first field, @code{$2} to the second, and so on. For
+example, suppose the following is a line of input:
+
+@example
+This seems like a pretty nice example.
+@end example
+
+@noindent
+Here the first field, or @code{$1}, is @samp{This}; the second field, or
+@code{$2}, is @samp{seems}; and so on. Note that the last field,
+@code{$7}, is @samp{example.}. Because there is no space between the
+@samp{e} and the @samp{.}, the period is considered part of the seventh
+field.
+
+@vindex NF
+@cindex number of fields, @code{NF}
+@code{NF} is a built-in variable whose value
+is the number of fields in the current record.
+@code{awk} updates the value of @code{NF} automatically, each time
+a record is read.
+
+No matter how many fields there are, the last field in a record can be
+represented by @code{$NF}. So, in the example above, @code{$NF} would
+be the same as @code{$7}, which is @samp{example.}. Why this works is
+explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
+If you try to reference a field beyond the last one, such as @code{$8}
+when the record has only seven fields, you get the empty string.
+@c the empty string acts like 0 in some contexts, but I don't want to
+@c get into that here....
+
+@code{$0}, which looks like a reference to the ``zeroth'' field, is
+a special case: it represents the whole input record. @code{$0} is
+used when you are not interested in fields.
+
+Here are some more examples:
+
+@example
+@group
+$ awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+@noindent
+This example prints each record in the file @file{BBS-list} whose first
+field contains the string @samp{foo}. The operator @samp{~} is called a
+@dfn{matching operator}
+(@pxref{Regexp Usage, , How to Use Regular Expressions});
+it tests whether a string (here, the field @code{$1}) matches a given regular
+expression.
+
+By contrast, the following example
+looks for @samp{foo} in @emph{the entire record} and prints the first
+field and the last field for each input record containing a
+match.
+
+@example
+@group
+$ awk '/foo/ @{ print $1, $NF @}' BBS-list
+@print{} fooey B
+@print{} foot B
+@print{} macfoo A
+@print{} sabafoo C
+@end group
+@end example
+
+@node Non-Constant Fields, Changing Fields, Fields, Reading Files
+@section Non-constant Field Numbers
+
+The number of a field does not need to be a constant. Any expression in
+the @code{awk} language can be used after a @samp{$} to refer to a
+field. The value of the expression specifies the field number. If the
+value is a string, rather than a number, it is converted to a number.
+Consider this example:
+
+@example
+awk '@{ print $NR @}'
+@end example
+
+@noindent
+Recall that @code{NR} is the number of records read so far: one in the
+first record, two in the second, etc. So this example prints the first
+field of the first record, the second field of the second record, and so
+on. For the twentieth record, field number 20 is printed; most likely,
+the record has fewer than 20 fields, so this prints a blank line.
+
+Here is another example of using expressions as field numbers:
+
+@example
+awk '@{ print $(2*2) @}' BBS-list
+@end example
+
+@code{awk} must evaluate the expression @samp{(2*2)} and use
+its value as the number of the field to print. The @samp{*} sign
+represents multiplication, so the expression @samp{2*2} evaluates to four.
+The parentheses are used so that the multiplication is done before the
+@samp{$} operation; they are necessary whenever there is a binary
+operator in the field-number expression. This example, then, prints the
+hours of operation (the fourth field) for every line of the file
+@file{BBS-list}. (All of the @code{awk} operators are listed, in
+order of decreasing precedence, in
+@ref{Precedence, , Operator Precedence (How Operators Nest)}.)
+
+If the field number you compute is zero, you get the entire record.
+Thus, @code{$(2-2)} has the same value as @code{$0}. Negative field
+numbers are not allowed; trying to reference one will usually terminate
+your running @code{awk} program. (The POSIX standard does not define
+what happens when you reference a negative field number. @code{gawk}
+will notice this and terminate your program. Other @code{awk}
+implementations may behave differently.)
+
+As mentioned in @ref{Fields, ,Examining Fields},
+the number of fields in the current record is stored in the built-in
+variable @code{NF} (also @pxref{Built-in Variables}). The expression
+@code{$NF} is not a special feature: it is the direct consequence of
+evaluating @code{NF} and using its value as a field number.
+
+@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
+@section Changing the Contents of a Field
+
+@cindex field, changing contents of
+@cindex changing contents of a field
+@cindex assignment to fields
+You can change the contents of a field as seen by @code{awk} within an
+@code{awk} program; this changes what @code{awk} perceives as the
+current input record. (The actual input is untouched; @code{awk} @emph{never}
+modifies the input file.)
+
+Consider this example and its output:
+
+@example
+@group
+$ awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
+@print{} 13 3
+@print{} 15 5
+@print{} 15 5
+@dots{}
+@end group
+@end example
+
+@noindent
+The @samp{-} sign represents subtraction, so this program reassigns
+field three, @code{$3}, to be the value of field two minus ten,
+@samp{$2 - 10}. (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
+Then field two, and the new value for field three, are printed.
+
+In order for this to work, the text in field @code{$2} must make sense
+as a number; the string of characters must be converted to a number in
+order for the computer to do arithmetic on it. The number resulting
+from the subtraction is converted back to a string of characters which
+then becomes field three.
+@xref{Conversion, ,Conversion of Strings and Numbers}.
+
+When you change the value of a field (as perceived by @code{awk}), the
+text of the input record is recalculated to contain the new field where
+the old one was. Therefore, @code{$0} changes to reflect the altered
+field. Thus, this program
+prints a copy of the input file, with 10 subtracted from the second
+field of each line.
+
+@example
+@group
+$ awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
+@print{} Jan 3 25 15 115
+@print{} Feb 5 32 24 226
+@print{} Mar 5 24 34 228
+@dots{}
+@end group
+@end example
+
+You can also assign contents to fields that are out of range. For
+example:
+
+@example
+$ awk '@{ $6 = ($5 + $4 + $3 + $2)
+> print $6 @}' inventory-shipped
+@print{} 168
+@print{} 297
+@print{} 301
+@dots{}
+@end example
+
+@noindent
+We've just created @code{$6}, whose value is the sum of fields
+@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign
+represents addition. For the file @file{inventory-shipped}, @code{$6}
+represents the total number of parcels shipped for a particular month.
+
+Creating a new field changes @code{awk}'s internal copy of the current
+input record---the value of @code{$0}. Thus, if you do @samp{print $0}
+after adding a field, the record printed includes the new field, with
+the appropriate number of field separators between it and the previously
+existing fields.
+
+This recomputation affects and is affected by
+@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}),
+and by a feature that has not been discussed yet,
+the @dfn{output field separator}, @code{OFS},
+which is used to separate the fields (@pxref{Output Separators}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.
+
+Note, however, that merely @emph{referencing} an out-of-range field
+does @emph{not} change the value of either @code{$0} or @code{NF}.
+Referencing an out-of-range field only produces an empty string. For
+example:
+
+@example
+if ($(NF+1) != "")
+ print "can't happen"
+else
+ print "everything is normal"
+@end example
+
+@noindent
+should print @samp{everything is normal}, because @code{NF+1} is certain
+to be out of range. (@xref{If Statement, ,The @code{if}-@code{else} Statement},
+for more information about @code{awk}'s @code{if-else} statements.
+@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions},
+for more information about the @samp{!=} operator.)
+
+It is important to note that making an assignment to an existing field
+will change the
+value of @code{$0}, but will not change the value of @code{NF},
+even when you assign the empty string to a field. For example:
+
+@example
+@group
+$ echo a b c d | awk '@{ OFS = ":"; $2 = ""
+> print $0; print NF @}'
+@print{} a::c:d
+@print{} 4
+@end group
+@end example
+
+@noindent
+The field is still there; it just has an empty value. You can tell
+because there are two colons in a row.
+
+This example shows what happens if you create a new field.
+
+@example
+$ echo a b c d | awk '@{ OFS = ":"; $2 = ""; $6 = "new"
+> print $0; print NF @}'
+@print{} a::c:d::new
+@print{} 6
+@end example
+
+@noindent
+The intervening field, @code{$5} is created with an empty value
+(indicated by the second pair of adjacent colons),
+and @code{NF} is updated with the value six.
+
+Finally, decrementing @code{NF} will lose the values of the fields
+after the new value of @code{NF}, and @code{$0} will be recomputed.
+Here is an example:
+
+@example
+$ echo a b c d e f | ../gawk '@{ print "NF =", NF;
+> NF = 3; print $0 @}'
+@print{} NF = 6
+@print{} a b c
+@end example
+
+@node Field Separators, Constant Size, Changing Fields, Reading Files
+@section Specifying How Fields are Separated
+
+This section is rather long; it describes one of the most fundamental
+operations in @code{awk}.
+
+@menu
+* Basic Field Splitting:: How fields are split with single characters
+ or simple strings.
+* Regexp Field Splitting:: Using regexps as the field separator.
+* Single Character Fields:: Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Field Splitting Summary:: Some final points and a summary table.
+@end menu
+
+@node Basic Field Splitting, Regexp Field Splitting, Field Separators, Field Separators
+@subsection The Basics of Field Separating
+@vindex FS
+@cindex fields, separating
+@cindex field separator, @code{FS}
+
+The @dfn{field separator}, which is either a single character or a regular
+expression, controls the way @code{awk} splits an input record into fields.
+@code{awk} scans the input record for character sequences that
+match the separator; the fields themselves are the text between the matches.
+
+In the examples below, we use the bullet symbol ``@bullet{}'' to represent
+spaces in the output.
+
+If the field separator is @samp{oo}, then the following line:
+
+@example
+moo goo gai pan
+@end example
+
+@noindent
+would be split into three fields: @samp{m}, @samp{@bullet{}g} and
+@samp{@bullet{}gai@bullet{}pan}.
+Note the leading spaces in the values of the second and third fields.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+The field separator is represented by the built-in variable @code{FS}.
+Shell programmers take note! @code{awk} does @emph{not} use the name @code{IFS}
+which is used by the POSIX compatible shells (such as the Bourne shell,
+@code{sh}, or the GNU Bourne-Again Shell, Bash).
+
+You can change the value of @code{FS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+Often the right time to do this is at the beginning of execution,
+before any input has been processed, so that the very first record
+will be read with the proper separator. To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:
+
+@example
+awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
+@end example
+
+@noindent
+Given the input line,
+
+@example
+John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+this @code{awk} program extracts and prints the string
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+
+@cindex field separator, choice of
+@cindex regular expressions as field separators
+Sometimes your input data will contain separator characters that don't
+separate fields the way you thought they would. For instance, the
+person's name in the example we just used might have a title or
+suffix attached, such as @samp{John Q. Smith, LXIX}. From input
+containing such a name:
+
+@example
+John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+@c careful of an overfull hbox here!
+the above program would extract @samp{@bullet{}LXIX}, instead of
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+If you were expecting the program to print the
+address, you would be surprised. The moral is: choose your data layout and
+separator characters carefully to prevent such problems.
+
+@iftex
+As you know, normally,
+@end iftex
+@ifinfo
+Normally,
+@end ifinfo
+fields are separated by whitespace sequences
+(spaces, tabs and newlines), not by single spaces: two spaces in a row do not
+delimit an empty field. The default value of the field separator @code{FS}
+is a string containing a single space, @w{@code{" "}}. If this value were
+interpreted in the usual way, each space character would separate
+fields, so two spaces in a row would make an empty field between them.
+The reason this does not happen is that a single space as the value of
+@code{FS} is a special case: it is taken to specify the default manner
+of delimiting fields.
+
+If @code{FS} is any other single character, such as @code{","}, then
+each occurrence of that character separates two fields. Two consecutive
+occurrences delimit an empty field. If the character occurs at the
+beginning or the end of the line, that too delimits an empty field. The
+space character is the only single character which does not follow these
+rules.
+
+@node Regexp Field Splitting, Single Character Fields, Basic Field Splitting, Field Separators
+@subsection Using Regular Expressions to Separate Fields
+
+The previous
+@iftex
+subsection
+@end iftex
+@ifinfo
+node
+@end ifinfo
+discussed the use of single characters or simple strings as the
+value of @code{FS}.
+More generally, the value of @code{FS} may be a string containing any
+regular expression. In this case, each match in the record for the regular
+expression separates fields. For example, the assignment:
+
+@example
+FS = ", \t"
+@end example
+
+@noindent
+makes every area of an input line that consists of a comma followed by a
+space and a tab, into a field separator. (@samp{\t}
+is an @dfn{escape sequence} that stands for a tab;
+@pxref{Escape Sequences},
+for the complete list of similar escape sequences.)
+
+For a less trivial example of a regular expression, suppose you want
+single spaces to separate fields the way single commas were used above.
+You can set @code{FS} to @w{@code{"[@ ]"}} (left bracket, space, right
+bracket). This regular expression matches a single space and nothing else
+(@pxref{Regexp, ,Regular Expressions}).
+
+There is an important difference between the two cases of @samp{FS = @w{" "}}
+(a single space) and @samp{FS = @w{"[ \t\n]+"}} (left bracket, space,
+backslash, ``t'', backslash, ``n'', right bracket, which is a regular
+expression matching one or more spaces, tabs, or newlines). For both
+values of @code{FS}, fields are separated by runs of spaces, tabs
+and/or newlines. However, when the value of @code{FS} is @w{@code{"
+"}}, @code{awk} will first strip leading and trailing whitespace from
+the record, and then decide where the fields are.
+
+For example, the following pipeline prints @samp{b}:
+
+@example
+$ echo ' a b c d ' | awk '@{ print $2 @}'
+@print{} b
+@end example
+
+@noindent
+However, this pipeline prints @samp{a} (note the extra spaces around
+each letter):
+
+@example
+$ echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t]+" @}
+> @{ print $2 @}'
+@print{} a
+@end example
+
+@noindent
+@cindex null string
+@cindex empty string
+In this case, the first field is @dfn{null}, or empty.
+
+The stripping of leading and trailing whitespace also comes into
+play whenever @code{$0} is recomputed. For instance, study this pipeline:
+
+@example
+$ echo ' a b c d' | awk '@{ print; $2 = $2; print @}'
+@print{} a b c d
+@print{} a b c d
+@end example
+
+@noindent
+The first @code{print} statement prints the record as it was read,
+with leading whitespace intact. The assignment to @code{$2} rebuilds
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
+separated by the value of @code{OFS}. Since the leading whitespace
+was ignored when finding @code{$1}, it is not part of the new @code{$0}.
+Finally, the last @code{print} statement prints the new @code{$0}.
+
+@node Single Character Fields, Command Line Field Separator, Regexp Field Splitting, Field Separators
+@subsection Making Each Character a Separate Field
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex single character fields
+There are times when you may want to examine each character
+of a record separately. In @code{gawk}, this is easy to do, you
+simply assign the null string (@code{""}) to @code{FS}. In this case,
+each individual character in the record will become a separate field.
+Here is an example:
+
+@example
+@group
+$ echo a b | gawk 'BEGIN @{ FS = "" @}
+> @{
+> for (i = 1; i <= NF; i = i + 1)
+> print "Field", i, "is", $i
+> @}'
+@print{} Field 1 is a
+@print{} Field 2 is
+@print{} Field 3 is b
+@end group
+@end example
+
+@cindex dark corner
+Traditionally, the behavior for @code{FS} equal to @code{""} was not defined.
+In this case, Unix @code{awk} would simply treat the entire record
+as only having one field (d.c.). In compatibility mode
+(@pxref{Options, ,Command Line Options}),
+if @code{FS} is the null string, then @code{gawk} will also
+behave this way.
+
+@node Command Line Field Separator, Field Splitting Summary, Single Character Fields, Field Separators
+@subsection Setting @code{FS} from the Command Line
+@cindex @code{-F} option
+@cindex field separator, on command line
+@cindex command line, setting @code{FS} on
+
+@code{FS} can be set on the command line. You use the @samp{-F} option to
+do so. For example:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+@noindent
+sets @code{FS} to be the @samp{,} character. Notice that the option uses
+a capital @samp{F}. Contrast this with @samp{-f}, which specifies a file
+containing an @code{awk} program. Case is significant in command line options:
+the @samp{-F} and @samp{-f} options have nothing to do with each other.
+You can use both options at the same time to set the @code{FS} variable
+@emph{and} get an @code{awk} program from a file.
+
+The value used for the argument to @samp{-F} is processed in exactly the
+same way as assignments to the built-in variable @code{FS}. This means that
+if the field separator contains special characters, they must be escaped
+appropriately. For example, to use a @samp{\} as the field separator, you
+would have to type:
+
+@example
+# same as FS = "\\"
+awk -F\\\\ '@dots{}' files @dots{}
+@end example
+
+@noindent
+Since @samp{\} is used for quoting in the shell, @code{awk} will see
+@samp{-F\\}. Then @code{awk} processes the @samp{\\} for escape
+characters (@pxref{Escape Sequences}), finally yielding
+a single @samp{\} to be used for the field separator.
+
+@cindex historical features
+As a special case, in compatibility mode
+(@pxref{Options, ,Command Line Options}), if the
+argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
+character. This is because if you type @samp{-F\t} at the shell,
+without any quotes, the @samp{\} gets deleted, so @code{awk} figures that you
+really want your fields to be separated with tabs, and not @samp{t}s.
+Use @samp{-v FS="t"} on the command line if you really do want to separate
+your fields with @samp{t}s
+(@pxref{Options, ,Command Line Options}).
+
+For example, let's use an @code{awk} program file called @file{baud.awk}
+that contains the pattern @code{/300/}, and the action @samp{print $1}.
+Here is the program:
+
+@example
+/300/ @{ print $1 @}
+@end example
+
+Let's also set @code{FS} to be the @samp{-} character, and run the
+program on the file @file{BBS-list}. The following command prints a
+list of the names of the bulletin boards that operate at 300 baud and
+the first three digits of their phone numbers:
+
+@c tweaked to make the tex output look better in @smallbook
+@example
+@group
+$ awk -F- -f baud.awk BBS-list
+@print{} aardvark 555
+@print{} alpo
+@print{} barfly 555
+@dots{}
+@end group
+@ignore
+@print{} bites 555
+@print{} camelot 555
+@print{} core 555
+@print{} fooey 555
+@print{} foot 555
+@print{} macfoo 555
+@print{} sdace 555
+@print{} sabafoo 555
+@end ignore
+@end example
+
+@noindent
+Note the second line of output. In the original file
+(@pxref{Sample Data Files, ,Data Files for the Examples}),
+the second line looked like this:
+
+@example
+alpo-net 555-3412 2400/1200/300 A
+@end example
+
+The @samp{-} as part of the system's name was used as the field
+separator, instead of the @samp{-} in the phone number that was
+originally intended. This demonstrates why you have to be careful in
+choosing your field and record separators.
+
+On many Unix systems, each user has a separate entry in the system password
+file, one line per user. The information in these lines is separated
+by colons. The first field is the user's logon name, and the second is
+the user's encrypted password. A password file entry might look like this:
+
+@example
+arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@end example
+
+The following program searches the system password file, and prints
+the entries for users who have no password:
+
+@example
+awk -F: '$2 == ""' /etc/passwd
+@end example
+
+@node Field Splitting Summary, , Command Line Field Separator, Field Separators
+@subsection Field Splitting Summary
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, @code{awk} is supposed to behave
+as if each record is split into fields at the time that it is read.
+In particular, this means that you can change the value of @code{FS}
+after a record is read, and the value of the fields (i.e.@: how they were split)
+should reflect the old value of @code{FS}, not the new one.
+
+@cindex dark corner
+@cindex @code{sed} utility
+@cindex stream editor
+However, many implementations of @code{awk} do not work this way. Instead,
+they defer splitting the fields until a field is actually
+referenced. The fields will be split
+using the @emph{current} value of @code{FS}! (d.c.)
+This behavior can be difficult
+to diagnose. The following example illustrates the difference
+between the two methods.
+(The @code{sed}@footnote{The @code{sed} utility is a ``stream editor.''
+Its behavior is also defined by the POSIX standard.}
+command prints just the first line of @file{/etc/passwd}.)
+
+@example
+sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
+@end example
+
+@noindent
+will usually print
+
+@example
+root
+@end example
+
+@noindent
+on an incorrect implementation of @code{awk}, while @code{gawk}
+will print something like
+
+@example
+root:nSijPlPhZZwgE:0:0:Root:/:
+@end example
+
+The following table summarizes how fields are split, based on the
+value of @code{FS}. (@samp{==} means ``is equal to.'')
+
+@c @cartouche
+@table @code
+@item FS == " "
+Fields are separated by runs of whitespace. Leading and trailing
+whitespace are ignored. This is the default.
+
+@item FS == @var{any other single character}
+Fields are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty fields, as do leading and
+trailing occurrences.
+The character can even be a regexp metacharacter; it does not need
+to be escaped.
+
+@item FS == @var{regexp}
+Fields are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty fields.
+
+@item FS == ""
+Each individual character in the record becomes a separate field.
+@end table
+@c @end cartouche
+
+@node Constant Size, Multiple Line, Field Separators, Reading Files
+@section Reading Fixed-width Data
+
+(This section discusses an advanced, experimental feature. If you are
+a novice @code{awk} user, you may wish to skip it on the first reading.)
+
+@code{gawk} version 2.13 introduced a new facility for dealing with
+fixed-width fields with no distinctive field separator. Data of this
+nature arises, for example, in the input for old FORTRAN programs where
+numbers are run together; or in the output of programs that did not
+anticipate the use of their output as input for other programs.
+
+An example of the latter is a table where all the columns are lined up by
+the use of a variable number of spaces and @emph{empty fields are just
+spaces}. Clearly, @code{awk}'s normal field splitting based on @code{FS}
+will not work well in this case. Although a portable @code{awk} program
+can use a series of @code{substr} calls on @code{$0}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+this is awkward and inefficient for a large number of fields.
+
+The splitting of an input record into fixed-width fields is specified by
+assigning a string containing space-separated numbers to the built-in
+variable @code{FIELDWIDTHS}. Each number specifies the width of the field
+@emph{including} columns between fields. If you want to ignore the columns
+between fields, you can specify the width as a separate field that is
+subsequently ignored.
+
+The following data is the output of the Unix @code{w} utility. It is useful
+to illustrate the use of @code{FIELDWIDTHS}.
+
+@example
+@group
+ 10:06pm up 21 days, 14:04, 23 users
+User tty login@ idle JCPU PCPU what
+hzuo ttyV0 8:58pm 9 5 vi p24.tex
+hzang ttyV3 6:37pm 50 -csh
+eklye ttyV5 9:53pm 7 1 em thes.tex
+dportein ttyV6 8:17pm 1:47 -csh
+gierd ttyD3 10:00pm 1 elm
+dave ttyD4 9:47pm 4 4 w
+brent ttyp0 26Jun91 4:46 26:46 4:41 bash
+dave ttyq4 26Jun9115days 46 46 wnewmail
+@end group
+@end example
+
+The following program takes the above input, converts the idle time to
+number of seconds and prints out the first two fields and the calculated
+idle time. (This program uses a number of @code{awk} features that
+haven't been introduced yet.)
+
+@example
+@group
+BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
+NR > 2 @{
+ idle = $4
+ sub(/^ */, "", idle) # strip leading spaces
+ if (idle == "")
+ idle = 0
+ if (idle ~ /:/) @{
+ split(idle, t, ":")
+ idle = t[1] * 60 + t[2]
+ @}
+ if (idle ~ /days/)
+ idle *= 24 * 60 * 60
+
+ print $1, $2, idle
+@}
+@end group
+@end example
+
+Here is the result of running the program on the data:
+
+@example
+hzuo ttyV0 0
+hzang ttyV3 50
+eklye ttyV5 0
+dportein ttyV6 107
+gierd ttyD3 1
+dave ttyD4 0
+brent ttyp0 286
+dave ttyq4 1296000
+@end example
+
+Another (possibly more practical) example of fixed-width input data
+would be the input from a deck of balloting cards. In some parts of
+the United States, voters mark their choices by punching holes in computer
+cards. These cards are then processed to count the votes for any particular
+candidate or on any particular issue. Since a voter may choose not to
+vote on some issue, any column on the card may be empty. An @code{awk}
+program for processing such data could use the @code{FIELDWIDTHS} feature
+to simplify reading the data. (Of course, getting @code{gawk} to run on
+a system with card readers is another story!)
+
+@ignore
+Exercise: Write a ballot card reading program
+@end ignore
+
+Assigning a value to @code{FS} causes @code{gawk} to return to using
+@code{FS} for field splitting. Use @samp{FS = FS} to make this happen,
+without having to know the current value of @code{FS}.
+
+This feature is still experimental, and may evolve over time.
+Note that in particular, @code{gawk} does not attempt to verify
+the sanity of the values used in the value of @code{FIELDWIDTHS}.
+
+@node Multiple Line, Getline, Constant Size, Reading Files
+@section Multiple-Line Records
+
+@cindex multiple line records
+@cindex input, multiple line records
+@cindex reading files, multiple line records
+@cindex records, multiple line
+In some data bases, a single line cannot conveniently hold all the
+information in one entry. In such cases, you can use multi-line
+records.
+
+The first step in doing this is to choose your data format: when records
+are not defined as single lines, how do you want to define them?
+What should separate records?
+
+One technique is to use an unusual character or string to separate
+records. For example, you could use the formfeed character (written
+@samp{\f} in @code{awk}, as in C) to separate them, making each record
+a page of the file. To do this, just set the variable @code{RS} to
+@code{"\f"} (a string containing the formfeed character). Any
+other character could equally well be used, as long as it won't be part
+of the data in a record.
+
+Another technique is to have blank lines separate records. By a special
+dispensation, an empty string as the value of @code{RS} indicates that
+records are separated by one or more blank lines. If you set @code{RS}
+to the empty string, a record always ends at the first blank line
+encountered. And the next record doesn't start until the first non-blank
+line that follows---no matter how many blank lines appear in a row, they
+are considered one record-separator.
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+You can achieve the same effect as @samp{RS = ""} by assigning the
+string @code{"\n\n+"} to @code{RS}. This regexp matches the newline
+at the end of the record, and one or more blank lines after the record.
+In addition, a regular expression always matches the longest possible
+sequence when there is a choice
+(@pxref{Leftmost Longest, ,How Much Text Matches?}).
+So the next record doesn't start until
+the first non-blank line that follows---no matter how many blank lines
+appear in a row, they are considered one record-separator.
+
+@cindex dark corner
+There is an important difference between @samp{RS = ""} and
+@samp{RS = "\n\n+"}. In the first case, leading newlines in the input
+data file are ignored, and if a file ends without extra blank lines
+after the last record, the final newline is removed from the record.
+In the second case, this special processing is not done (d.c.).
+
+Now that the input is separated into records, the second step is to
+separate the fields in the record. One way to do this is to divide each
+of the lines into fields in the normal manner. This happens by default
+as the result of a special feature: when @code{RS} is set to the empty
+string, the newline character @emph{always} acts as a field separator.
+This is in addition to whatever field separations result from @code{FS}.
+
+The original motivation for this special exception was probably to provide
+useful behavior in the default case (i.e.@: @code{FS} is equal
+to @w{@code{" "}}). This feature can be a problem if you really don't
+want the newline character to separate fields, since there is no way to
+prevent it. However, you can work around this by using the @code{split}
+function to break up the record manually
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+Another way to separate fields is to
+put each field on a separate line: to do this, just set the
+variable @code{FS} to the string @code{"\n"}. (This simple regular
+expression matches a single newline.)
+
+A practical example of a data file organized this way might be a mailing
+list, where each entry is separated by blank lines. If we have a mailing
+list in a file named @file{addresses}, that looks like this:
+
+@example
+Jane Doe
+123 Main Street
+Anywhere, SE 12345-6789
+
+John Smith
+456 Tree-lined Avenue
+Smallville, MW 98765-4321
+
+@dots{}
+@end example
+
+@noindent
+A simple program to process this file would look like this:
+
+@example
+@group
+# addrs.awk --- simple mailing list program
+
+# Records are separated by blank lines.
+# Each line is one field.
+BEGIN @{ RS = "" ; FS = "\n" @}
+
+@{
+ print "Name is:", $1
+ print "Address is:", $2
+ print "City and State are:", $3
+ print ""
+@}
+@end group
+@end example
+
+Running the program produces the following output:
+
+@example
+@group
+$ awk -f addrs.awk addresses
+@print{} Name is: Jane Doe
+@print{} Address is: 123 Main Street
+@print{} City and State are: Anywhere, SE 12345-6789
+@print{}
+@end group
+@group
+@print{} Name is: John Smith
+@print{} Address is: 456 Tree-lined Avenue
+@print{} City and State are: Smallville, MW 98765-4321
+@print{}
+@dots{}
+@end group
+@end example
+
+@xref{Labels Program, ,Printing Mailing Labels}, for a more realistic
+program that deals with address lists.
+
+The following table summarizes how records are split, based on the
+value of @code{RS}. (@samp{==} means ``is equal to.'')
+
+@c @cartouche
+@table @code
+@item RS == "\n"
+Records are separated by the newline character (@samp{\n}). In effect,
+every line in the data file is a separate record, including blank lines.
+This is the default.
+
+@item RS == @var{any single character}
+Records are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty records.
+
+@item RS == ""
+Records are separated by runs of blank lines. The newline character
+always serves as a field separator, in addition to whatever value
+@code{FS} may have. Leading and trailing newlines in a file are ignored.
+
+@item RS == @var{regexp}
+Records are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty records.
+@end table
+@c @end cartouche
+
+@vindex RT
+In all cases, @code{gawk} sets @code{RT} to the input text that matched the
+value specified by @code{RS}.
+
+@node Getline, , Multiple Line, Reading Files
+@section Explicit Input with @code{getline}
+
+@findex getline
+@cindex input, explicit
+@cindex explicit input
+@cindex input, @code{getline} command
+@cindex reading files, @code{getline} command
+So far we have been getting our input data from @code{awk}'s main
+input stream---either the standard input (usually your terminal, sometimes
+the output from another program) or from the
+files specified on the command line. The @code{awk} language has a
+special built-in command called @code{getline} that
+can be used to read input under your explicit control.
+
+@menu
+* Getline Intro:: Introduction to the @code{getline} function.
+* Plain Getline:: Using @code{getline} with no arguments.
+* Getline/Variable:: Using @code{getline} into a variable.
+* Getline/File:: Using @code{getline} from a file.
+* Getline/Variable/File:: Using @code{getline} into a variable from a
+ file.
+* Getline/Pipe:: Using @code{getline} from a pipe.
+* Getline/Variable/Pipe:: Using @code{getline} into a variable from a
+ pipe.
+* Getline Summary:: Summary Of @code{getline} Variants.
+@end menu
+
+@node Getline Intro, Plain Getline, Getline, Getline
+@subsection Introduction to @code{getline}
+
+This command is used in several different ways, and should @emph{not} be
+used by beginners. It is covered here because this is the chapter on input.
+The examples that follow the explanation of the @code{getline} command
+include material that has not been covered yet. Therefore, come back
+and study the @code{getline} command @emph{after} you have reviewed the
+rest of this @value{DOCUMENT} and have a good knowledge of how @code{awk} works.
+
+@vindex ERRNO
+@cindex differences between @code{gawk} and @code{awk}
+@cindex @code{getline}, return values
+@code{getline} returns one if it finds a record, and zero if the end of the
+file is encountered. If there is some error in getting a record, such
+as a file that cannot be opened, then @code{getline} returns @minus{}1.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+In the following examples, @var{command} stands for a string value that
+represents a shell command.
+
+@node Plain Getline, Getline/Variable, Getline Intro, Getline
+@subsection Using @code{getline} with No Arguments
+
+The @code{getline} command can be used without arguments to read input
+from the current input file. All it does in this case is read the next
+input record and split it up into fields. This is useful if you've
+finished processing the current record, but you want to do some special
+processing @emph{right now} on the next record. Here's an
+example:
+
+@example
+@group
+awk '@{
+ if ((t = index($0, "/*")) != 0) @{
+ # value will be "" if t is 1
+ tmp = substr($0, 1, t - 1)
+ u = index(substr($0, t + 2), "*/")
+ while (u == 0) @{
+ if (getline <= 0) @{
+ m = "unexpected EOF or error"
+ m = (m ": " ERRNO)
+ print m > "/dev/stderr"
+ exit
+ @}
+ t = -1
+ u = index($0, "*/")
+ @}
+@end group
+@group
+ # substr expression will be "" if */
+ # occurred at end of line
+ $0 = tmp substr($0, t + u + 3)
+ @}
+ print $0
+@}'
+@end group
+@end example
+
+This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
+*/}, from the input. By replacing the @samp{print $0} with other
+statements, you could perform more complicated processing on the
+decommented input, like searching for matches of a regular
+expression. This program has a subtle problem---it does not work if one
+comment ends and another begins on the same line.
+
+@ignore
+Exercise,
+write a program that does handle multiple comments on the line.
+@end ignore
+
+This form of the @code{getline} command sets @code{NF} (the number of
+fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
+records read so far; @pxref{Records, ,How Input is Split into Records}),
+@code{FNR} (the number of records read from this input file), and the
+value of @code{$0}.
+
+@cindex dark corner
+@strong{Note:} the new value of @code{$0} is used in testing
+the patterns of any subsequent rules. The original value
+of @code{$0} that triggered the rule which executed @code{getline}
+is lost (d.c.).
+By contrast, the @code{next} statement reads a new record
+but immediately begins processing it normally, starting with the first
+rule in the program. @xref{Next Statement, ,The @code{next} Statement}.
+
+@node Getline/Variable, Getline/File, Plain Getline, Getline
+@subsection Using @code{getline} Into a Variable
+
+You can use @samp{getline @var{var}} to read the next record from
+@code{awk}'s input into the variable @var{var}. No other processing is
+done.
+
+For example, suppose the next line is a comment, or a special string,
+and you want to read it, without triggering
+any rules. This form of @code{getline} allows you to read that line
+and store it in a variable so that the main
+read-a-line-and-check-each-rule loop of @code{awk} never sees it.
+
+The following example swaps every two lines of input. For example, given:
+
+@example
+wan
+tew
+free
+phore
+@end example
+
+@noindent
+it outputs:
+
+@example
+tew
+wan
+phore
+free
+@end example
+
+@noindent
+Here's the program:
+
+@example
+@group
+awk '@{
+ if ((getline tmp) > 0) @{
+ print tmp
+ print $0
+ @} else
+ print $0
+@}'
+@end group
+@end example
+
+The @code{getline} command used in this way sets only the variables
+@code{NR} and @code{FNR} (and of course, @var{var}). The record is not
+split into fields, so the values of the fields (including @code{$0}) and
+the value of @code{NF} do not change.
+
+@node Getline/File, Getline/Variable/File, Getline/Variable, Getline
+@subsection Using @code{getline} from a File
+
+@cindex input redirection
+@cindex redirection of input
+Use @samp{getline < @var{file}} to read
+the next record from the file
+@var{file}. Here @var{file} is a string-valued expression that
+specifies the file name. @samp{< @var{file}} is called a @dfn{redirection}
+since it directs input to come from a different place.
+
+For example, the following
+program reads its input record from the file @file{secondary.input} when it
+encounters a first field with a value equal to 10 in the current input
+file.
+
+@example
+@group
+awk '@{
+ if ($1 == 10) @{
+ getline < "secondary.input"
+ print
+ @} else
+ print
+@}'
+@end group
+@end example
+
+Since the main input stream is not used, the values of @code{NR} and
+@code{FNR} are not changed. But the record read is split into fields in
+the normal manner, so the values of @code{$0} and other fields are
+changed. So is the value of @code{NF}.
+
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{getline < @var{expression}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{getline < (dir "/" file)} if you want your program
+to be portable to other @code{awk} implementations.
+
+@node Getline/Variable/File, Getline/Pipe, Getline/File, Getline
+@subsection Using @code{getline} Into a Variable from a File
+
+Use @samp{getline @var{var} < @var{file}} to read input
+the file
+@var{file} and put it in the variable @var{var}. As above, @var{file}
+is a string-valued expression that specifies the file from which to read.
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields. The only variable
+changed is @var{var}.
+
+@ifinfo
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{getline @var{var} < @var{expression}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{getline < (dir "/" file)} if you want your program
+to be portable to other @code{awk} implementations.
+@end ifinfo
+
+For example, the following program copies all the input files to the
+output, except for records that say @w{@samp{@@include @var{filename}}}.
+Such a record is replaced by the contents of the file
+@var{filename}.
+
+@example
+@group
+awk '@{
+ if (NF == 2 && $1 == "@@include") @{
+ while ((getline line < $2) > 0)
+ print line
+ close($2)
+ @} else
+ print
+@}'
+@end group
+@end example
+
+Note here how the name of the extra input file is not built into
+the program; it is taken directly from the data, from the second field on
+the @samp{@@include} line.
+
+The @code{close} function is called to ensure that if two identical
+@samp{@@include} lines appear in the input, the entire specified file is
+included twice.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}.
+
+One deficiency of this program is that it does not process nested
+@samp{@@include} statements
+(@samp{@@include} statements in included files)
+the way a true macro preprocessor would.
+@xref{Igawk Program, ,An Easy Way to Use Library Functions}, for a program
+that does handle nested @samp{@@include} statements.
+
+@node Getline/Pipe, Getline/Variable/Pipe, Getline/Variable/File, Getline
+@subsection Using @code{getline} from a Pipe
+
+@cindex input pipeline
+@cindex pipeline, input
+You can pipe the output of a command into @code{getline}, using
+@samp{@var{command} | getline}. In
+this case, the string @var{command} is run as a shell command and its output
+is piped into @code{awk} to be used as input. This form of @code{getline}
+reads one record at a time from the pipe.
+
+For example, the following program copies its input to its output, except for
+lines that begin with @samp{@@execute}, which are replaced by the output
+produced by running the rest of the line as a shell command:
+
+@example
+@group
+awk '@{
+ if ($1 == "@@execute") @{
+ tmp = substr($0, 10)
+ while ((tmp | getline) > 0)
+ print
+ close(tmp)
+ @} else
+ print
+@}'
+@end group
+@end example
+
+@noindent
+The @code{close} function is called to ensure that if two identical
+@samp{@@execute} lines appear in the input, the command is run for
+each one.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}.
+@c Exercise!!
+@c This example is unrealistic, since you could just use system
+
+@c NEEDED
+@page
+Given the input:
+
+@example
+@group
+foo
+bar
+baz
+@@execute who
+bletch
+@end group
+@end example
+
+@noindent
+the program might produce:
+
+@example
+@group
+foo
+bar
+baz
+arnold ttyv0 Jul 13 14:22
+miriam ttyp0 Jul 13 14:23 (murphy:0)
+bill ttyp1 Jul 13 14:23 (murphy:0)
+bletch
+@end group
+@end example
+
+@noindent
+Notice that this program ran the command @code{who} and printed the result.
+(If you try this program yourself, you will of course get different results,
+showing you who is logged in on your system.)
+
+This variation of @code{getline} splits the record into fields, sets the
+value of @code{NF} and recomputes the value of @code{$0}. The values of
+@code{NR} and @code{FNR} are not changed.
+
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{@var{expression} | getline} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{"echo " "date" | getline} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{("echo " "date") | getline} if you want your program
+to be portable to other @code{awk} implementations.
+
+@node Getline/Variable/Pipe, Getline Summary, Getline/Pipe, Getline
+@subsection Using @code{getline} Into a Variable from a Pipe
+
+When you use @samp{@var{command} | getline @var{var}}, the
+output of the command @var{command} is sent through a pipe to
+@code{getline} and into the variable @var{var}. For example, the
+following program reads the current date and time into the variable
+@code{current_time}, using the @code{date} utility, and then
+prints it.
+
+@example
+@group
+awk 'BEGIN @{
+ "date" | getline current_time
+ close("date")
+ print "Report printed on " current_time
+@}'
+@end group
+@end example
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.
+
+@ifinfo
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{@var{expression} | getline @var{var}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{"echo " "date" | getline @var{var}} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{("echo " "date") | getline @var{var}} if you want your
+program to be portable to other @code{awk} implementations.
+@end ifinfo
+
+@node Getline Summary, , Getline/Variable/Pipe, Getline
+@subsection Summary of @code{getline} Variants
+
+With all the forms of @code{getline}, even though @code{$0} and @code{NF},
+may be updated, the record will not be tested against all the patterns
+in the @code{awk} program, in the way that would happen if the record
+were read normally by the main processing loop of @code{awk}. However
+the new record is tested against any subsequent rules.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex limitations
+@cindex implementation limits
+Many @code{awk} implementations limit the number of pipelines an @code{awk}
+program may have open to just one! In @code{gawk}, there is no such limit.
+You can open as many pipelines as the underlying operating system will
+permit.
+
+@vindex FILENAME
+@cindex dark corner
+@cindex @code{getline}, setting @code{FILENAME}
+@cindex @code{FILENAME}, being set by @code{getline}
+An interesting side-effect occurs if you use @code{getline} (without a
+redirection) inside a @code{BEGIN} rule. Since an unredirected @code{getline}
+reads from the command line data files, the first @code{getline} command
+causes @code{awk} to set the value of @code{FILENAME}. Normally,
+@code{FILENAME} does not have a value inside @code{BEGIN} rules, since you
+have not yet started to process the command line data files (d.c.).
+(@xref{BEGIN/END, , The @code{BEGIN} and @code{END} Special Patterns},
+also @pxref{Auto-set, , Built-in Variables that Convey Information}.)
+
+The following table summarizes the six variants of @code{getline},
+listing which built-in variables are set by each one.
+
+@c @cartouche
+@table @code
+@item getline
+sets @code{$0}, @code{NF}, @code{FNR}, and @code{NR}.
+
+@item getline @var{var}
+sets @var{var}, @code{FNR}, and @code{NR}.
+
+@item getline < @var{file}
+sets @code{$0}, and @code{NF}.
+
+@item getline @var{var} < @var{file}
+sets @var{var}.
+
+@item @var{command} | getline
+sets @code{$0}, and @code{NF}.
+
+@item @var{command} | getline @var{var}
+sets @var{var}.
+@end table
+@c @end cartouche
+
+@node Printing, Expressions, Reading Files, Top
+@chapter Printing Output
+
+@cindex printing
+@cindex output
+One of the most common actions is to @dfn{print}, or output,
+some or all of the input. You use the @code{print} statement
+for simple output. You use the @code{printf} statement
+for fancier formatting. Both are described in this chapter.
+
+@menu
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Redirection:: How to redirect output to multiple files and
+ pipes.
+* Special Files:: File name interpretation in @code{gawk}.
+ @code{gawk} allows access to inherited file
+ descriptors.
+* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+@end menu
+
+@node Print, Print Examples, Printing, Printing
+@section The @code{print} Statement
+@cindex @code{print} statement
+
+The @code{print} statement does output with simple, standardized
+formatting. You specify only the strings or numbers to be printed, in a
+list separated by commas. They are output, separated by single spaces,
+followed by a newline. The statement looks like this:
+
+@example
+print @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of items may optionally be enclosed in parentheses. The
+parentheses are necessary if any of the item expressions uses the @samp{>}
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+
+The items to be printed can be constant strings or numbers, fields of the
+current record (such as @code{$1}), variables, or any @code{awk}
+expressions.
+Numeric values are converted to strings, and then printed.
+
+The @code{print} statement is completely general for
+computing @emph{what} values to print. However, with two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(For the exceptions, @pxref{Output Separators}, and
+@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
+For that, you need the @code{printf} statement
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+
+The simple statement @samp{print} with no items is equivalent to
+@samp{print $0}: it prints the entire current record. To print a blank
+line, use @samp{print ""}, where @code{""} is the empty string.
+
+To print a fixed piece of text, use a string constant such as
+@w{@code{"Don't Panic"}} as one item. If you forget to use the
+double-quote characters, your text will be taken as an @code{awk}
+expression, and you will probably get an error. Keep in mind that a
+space is printed between any two items.
+
+Each @code{print} statement makes at least one line of output. But it
+isn't limited to one line. If an item value is a string that contains a
+newline, the newline is output along with the rest of the string. A
+single @code{print} can make any number of lines this way.
+
+@node Print Examples, Output Separators, Print, Printing
+@section Examples of @code{print} Statements
+
+Here is an example of printing a string that contains embedded newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; see @ref{Escape Sequences}):
+
+@example
+@group
+$ awk 'BEGIN @{ print "line one\nline two\nline three" @}'
+@print{} line one
+@print{} line two
+@print{} line three
+@end group
+@end example
+
+Here is an example that prints the first two fields of each input record,
+with a space between them:
+
+@example
+@group
+$ awk '@{ print $1, $2 @}' inventory-shipped
+@print{} Jan 13
+@print{} Feb 15
+@print{} Mar 15
+@dots{}
+@end group
+@end example
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+A common mistake in using the @code{print} statement is to omit the comma
+between two items. This often has the effect of making the items run
+together in the output, with no space. The reason for this is that
+juxtaposing two string expressions in @code{awk} means to concatenate
+them. Here is the same program, without the comma:
+
+@example
+@group
+$ awk '@{ print $1 $2 @}' inventory-shipped
+@print{} Jan13
+@print{} Feb15
+@print{} Mar15
+@dots{}
+@end group
+@end example
+
+To someone unfamiliar with the file @file{inventory-shipped}, neither
+example's output makes much sense. A heading line at the beginning
+would make it clearer. Let's add some headings to our table of months
+(@code{$1}) and green crates shipped (@code{$2}). We do this using the
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+to force the headings to be printed only once:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Did you already guess what happens? When run, the program prints
+the following:
+
+@example
+@group
+Month Crates
+----- ------
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end group
+@end example
+
+@noindent
+The headings and the table data don't line up! We can fix this by printing
+some spaces between the two fields:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+
+You can imagine that this way of lining up columns can get pretty
+complicated when you have many columns to fix. Counting spaces for two
+or three columns can be simple, but more than this and you can get
+lost quite easily. This is why the @code{printf} statement was
+created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
+one of its specialties is lining up columns of data.
+
+@cindex line continuation
+As a side point,
+you can continue either a @code{print} or @code{printf} statement simply
+by putting a newline after any comma
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@node Output Separators, OFMT, Print Examples, Printing
+@section Output Separators
+
+@cindex output field separator, @code{OFS}
+@cindex output record separator, @code{ORS}
+@vindex OFS
+@vindex ORS
+As mentioned previously, a @code{print} statement contains a list
+of items, separated by commas. In the output, the items are normally
+separated by single spaces. This need not be the case; a
+single space is only the default. You can specify any string of
+characters to use as the @dfn{output field separator} by setting the
+built-in variable @code{OFS}. The initial value of this variable
+is the string @w{@code{" "}}, that is, a single space.
+
+The output from an entire @code{print} statement is called an
+@dfn{output record}. Each @code{print} statement outputs one output
+record and then outputs a string called the @dfn{output record separator}.
+The built-in variable @code{ORS} specifies this string. The initial
+value of @code{ORS} is the string @code{"\n"}, i.e.@: a newline
+character; thus, normally each @code{print} statement makes a separate line.
+
+You can change how output fields and records are separated by assigning
+new values to the variables @code{OFS} and/or @code{ORS}. The usual
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}), so
+that it happens before any input is processed. You may also do this
+with assignments on the command line, before the names of your input
+files, or using the @samp{-v} command line option
+(@pxref{Options, ,Command Line Options}).
+
+@ignore
+Exercise,
+Rewrite the
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+program by using a new value of @code{OFS}.
+@end ignore
+
+The following example prints the first and second fields of each input
+record separated by a semicolon, with a blank line added after each
+line:
+
+@example
+@group
+$ awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
+> @{ print $1, $2 @}' BBS-list
+@print{} aardvark;555-5553
+@print{}
+@print{} alpo-net;555-3412
+@print{}
+@print{} barfly;555-7685
+@dots{}
+@end group
+@end example
+
+If the value of @code{ORS} does not contain a newline, all your output
+will be run together on a single line, unless you output newlines some
+other way.
+
+@node OFMT, Printf, Output Separators, Printing
+@section Controlling Numeric Output with @code{print}
+@vindex OFMT
+@cindex numeric output format
+@cindex format, numeric output
+@cindex output format specifier, @code{OFMT}
+When you use the @code{print} statement to print numeric values,
+@code{awk} internally converts the number to a string of characters,
+and prints that string. @code{awk} uses the @code{sprintf} function
+to do this conversion
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For now, it suffices to say that the @code{sprintf}
+function accepts a @dfn{format specification} that tells it how to format
+numbers (or strings), and that there are a number of different ways in which
+numbers can be formatted. The different format specifications are discussed
+more fully in
+@ref{Control Letters, , Format-Control Letters}.
+
+The built-in variable @code{OFMT} contains the default format specification
+that @code{print} uses with @code{sprintf} when it wants to convert a
+number to a string for printing.
+The default value of @code{OFMT} is @code{"%.6g"}.
+By supplying different format specifications
+as the value of @code{OFMT}, you can change how @code{print} will print
+your numbers. As a brief example:
+
+@example
+@group
+$ awk 'BEGIN @{
+> OFMT = "%.0f" # print numbers as integers (rounds)
+> print 17.23 @}'
+@print{} 17
+@end group
+@end example
+
+@noindent
+@cindex dark corner
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, @code{awk}'s behavior will be undefined
+if @code{OFMT} contains anything but a floating point conversion specification
+(d.c.).
+
+@node Printf, Redirection, OFMT, Printing
+@section Using @code{printf} Statements for Fancier Printing
+@cindex formatted output
+@cindex output, formatted
+
+If you want more precise control over the output format than
+@code{print} gives you, use @code{printf}. With @code{printf} you can
+specify the width to use for each item, and you can specify various
+formatting choices for numbers (such as what radix to use, whether to
+print an exponent, whether to print a sign, and how many digits to print
+after the decimal point). You do this by supplying a string, called
+the @dfn{format string}, which controls how and where to print the other
+arguments.
+
+@menu
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+@end menu
+
+@node Basic Printf, Control Letters, Printf, Printf
+@subsection Introduction to the @code{printf} Statement
+
+@cindex @code{printf} statement, syntax of
+The @code{printf} statement looks like this:
+
+@example
+printf @var{format}, @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of arguments may optionally be enclosed in parentheses. The
+parentheses are necessary if any of the item expressions use the @samp{>}
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+
+@cindex format string
+The difference between @code{printf} and @code{print} is the @var{format}
+argument. This is an expression whose value is taken as a string; it
+specifies how to output each of the other arguments. It is called
+the @dfn{format string}.
+
+The format string is very similar to that in the ANSI C library function
+@code{printf}. Most of @var{format} is text to be output verbatim.
+Scattered among this text are @dfn{format specifiers}, one per item.
+Each format specifier says to output the next item in the argument list
+at that place in the format.
+
+The @code{printf} statement does not automatically append a newline to its
+output. It outputs only what the format string specifies. So if you want
+a newline, you must include one in the format string. The output separator
+variables @code{OFS} and @code{ORS} have no effect on @code{printf}
+statements. For example:
+
+@example
+@group
+BEGIN @{
+ ORS = "\nOUCH!\n"; OFS = "!"
+ msg = "Don't Panic!"; printf "%s\n", msg
+@}
+@end group
+@end example
+
+This program still prints the familiar @samp{Don't Panic!} message.
+
+@node Control Letters, Format Modifiers, Basic Printf, Printf
+@subsection Format-Control Letters
+@cindex @code{printf}, format-control characters
+@cindex format specifier
+
+A format specifier starts with the character @samp{%} and ends with a
+@dfn{format-control letter}; it tells the @code{printf} statement how
+to output one item. (If you actually want to output a @samp{%}, write
+@samp{%%}.) The format-control letter specifies what kind of value to
+print. The rest of the format specifier is made up of optional
+@dfn{modifiers} which are parameters to use, such as the field width.
+
+Here is a list of the format-control letters:
+
+@table @code
+@item c
+This prints a number as an ASCII character. Thus, @samp{printf "%c",
+65} outputs the letter @samp{A}. The output for a string value is
+the first character of the string.
+
+@item d
+@itemx i
+These are equivalent. They both print a decimal integer.
+The @samp{%i} specification is for compatibility with ANSI C.
+
+@item e
+@itemx E
+This prints a number in scientific (exponential) notation.
+For example,
+
+@example
+printf "%4.3e\n", 1950
+@end example
+
+@noindent
+prints @samp{1.950e+03}, with a total of four significant figures of
+which three follow the decimal point. The @samp{4.3} are modifiers,
+discussed below. @samp{%E} uses @samp{E} instead of @samp{e} in the output.
+
+@item f
+This prints a number in floating point notation.
+For example,
+
+@example
+printf "%4.3f", 1950
+@end example
+
+@noindent
+prints @samp{1950.000}, with a total of four significant figures of
+which three follow the decimal point. The @samp{4.3} are modifiers,
+discussed below.
+
+@item g
+@itemx G
+This prints a number in either scientific notation or floating point
+notation, whichever uses fewer characters. If the result is printed in
+scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
+
+@item o
+This prints an unsigned octal integer.
+(In octal, or base-eight notation, the digits run from @samp{0} to @samp{7};
+the decimal number eight is represented as @samp{10} in octal.)
+
+@item s
+This prints a string.
+
+@item x
+@itemx X
+This prints an unsigned hexadecimal integer.
+(In hexadecimal, or base-16 notation, the digits are @samp{0} through @samp{9}
+and @samp{a} through @samp{f}. The hexadecimal digit @samp{f} represents
+the decimal number 15.) @samp{%X} uses the letters @samp{A} through @samp{F}
+instead of @samp{a} through @samp{f}.
+
+@item %
+This isn't really a format-control letter, but it does have a meaning
+when used after a @samp{%}: the sequence @samp{%%} outputs one
+@samp{%}. It does not consume an argument, and it ignores any
+modifiers.
+@end table
+
+@cindex dark corner
+When using the integer format-control letters for values that are outside
+the range of a C @code{long} integer, @code{gawk} will switch to the
+@samp{%g} format specifier. Other versions of @code{awk} may print
+invalid values, or do something else entirely (d.c.).
+
+@node Format Modifiers, Printf Examples, Control Letters, Printf
+@subsection Modifiers for @code{printf} Formats
+
+@cindex @code{printf}, modifiers
+@cindex modifiers (in format specifiers)
+A format specification can also include @dfn{modifiers} that can control
+how much of the item's value is printed and how much space it gets. The
+modifiers come between the @samp{%} and the format-control letter.
+In the examples below, we use the bullet symbol ``@bullet{}'' to represent
+spaces in the output. Here are the possible modifiers, in the order in
+which they may appear:
+
+@table @code
+@item -
+The minus sign, used before the width modifier (see below),
+says to left-justify
+the argument within its specified width. Normally the argument
+is printed right-justified in the specified width. Thus,
+
+@example
+printf "%-4s", "foo"
+@end example
+
+@noindent
+prints @samp{foo@bullet{}}.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternate form'' for certain control letters.
+For @samp{%o}, supply a leading zero.
+For @samp{%x}, and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for
+a non-zero result.
+For @samp{%e}, @samp{%E}, and @samp{%f}, the result will always contain a
+decimal point.
+For @samp{%g}, and @samp{%G}, trailing zeros are not removed from the result.
+
+@cindex dark corner
+@item 0
+A leading @samp{0} (zero) acts as a flag, that indicates output should be
+padded with zeros instead of spaces.
+This applies even to non-numeric output formats (d.c.).
+This flag only has an effect when the field width is wider than the
+value to be printed.
+
+@item @var{width}
+This is a number specifying the desired minimum width of a field. Inserting any
+number between the @samp{%} sign and the format control character forces the
+field to be expanded to this width. The default way to do this is to
+pad with spaces on the left. For example,
+
+@example
+printf "%4s", "foo"
+@end example
+
+@noindent
+prints @samp{@bullet{}foo}.
+
+The value of @var{width} is a minimum width, not a maximum. If the item
+value requires more than @var{width} characters, it can be as wide as
+necessary. Thus,
+
+@example
+printf "%4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foobar}.
+
+Preceding the @var{width} with a minus sign causes the output to be
+padded with spaces on the right, instead of on the left.
+
+@item .@var{prec}
+This is a number that specifies the precision to use when printing.
+For the @samp{e}, @samp{E}, and @samp{f} formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the @samp{g}, and @samp{G} formats, it specifies the maximum number
+of significant digits. For the @samp{d}, @samp{o}, @samp{i}, @samp{u},
+@samp{x}, and @samp{X} formats, it specifies the minimum number of
+digits to print. For a string, it specifies the maximum number of
+characters from the string that should be printed. Thus,
+
+@example
+printf "%.4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foob}.
+@end table
+
+The C library @code{printf}'s dynamic @var{width} and @var{prec}
+capability (for example, @code{"%*.*s"}) is supported. Instead of
+supplying explicit @var{width} and/or @var{prec} values in the format
+string, you pass them in the argument list. For example:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%*.*s\n", w, p, s
+@end example
+
+@noindent
+is exactly equivalent to
+
+@example
+s = "abcdefg"
+printf "%5.3s\n", s
+@end example
+
+@noindent
+Both programs output @samp{@w{@bullet{}@bullet{}abc}}.
+
+Earlier versions of @code{awk} did not support this capability.
+If you must use such a version, you may simulate this feature by using
+concatenation to build up the format string, like so:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%" w "." p "s\n", s
+@end example
+
+@noindent
+This is not particularly easy to read, but it does work.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+C programmers may be used to supplying additional @samp{l} and @samp{h}
+flags in @code{printf} format strings. These are not valid in @code{awk}.
+Most @code{awk} implementations silently ignore these flags.
+If @samp{--lint} is provided on the command line
+(@pxref{Options, ,Command Line Options}),
+@code{gawk} will warn about their use. If @samp{--posix} is supplied,
+their use is a fatal error.
+
+@node Printf Examples, , Format Modifiers, Printf
+@subsection Examples Using @code{printf}
+
+Here is how to use @code{printf} to make an aligned table:
+
+@example
+awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end example
+
+@noindent
+prints the names of bulletin boards (@code{$1}) of the file
+@file{BBS-list} as a string of 10 characters, left justified. It also
+prints the phone numbers (@code{$2}) afterward on the line. This
+produces an aligned two-column table of names and phone numbers:
+
+@example
+@group
+$ awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@print{} aardvark 555-5553
+@print{} alpo-net 555-3412
+@print{} barfly 555-7685
+@print{} bites 555-1675
+@print{} camelot 555-0542
+@print{} core 555-2912
+@print{} fooey 555-1234
+@print{} foot 555-6699
+@print{} macfoo 555-6480
+@print{} sdace 555-3430
+@print{} sabafoo 555-2127
+@end group
+@end example
+
+Did you notice that we did not specify that the phone numbers be printed
+as numbers? They had to be printed as strings because the numbers are
+separated by a dash.
+If we had tried to print the phone numbers as numbers, all we would have
+gotten would have been the first three digits, @samp{555}.
+This would have been pretty confusing.
+
+We did not specify a width for the phone numbers because they are the
+last things on their lines. We don't need to put spaces after them.
+
+We could make our table look even nicer by adding headings to the tops
+of the columns. To do this, we use the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+to force the header to be printed only once, at the beginning of
+the @code{awk} program:
+
+@example
+@group
+awk 'BEGIN @{ print "Name Number"
+ print "---- ------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+Did you notice that we mixed @code{print} and @code{printf} statements in
+the above example? We could have used just @code{printf} statements to get
+the same results:
+
+@example
+@group
+awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
+ printf "%-10s %s\n", "----", "------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+@noindent
+By printing each column heading with the same format specification
+used for the elements of the column, we have made sure that the headings
+are aligned just like the columns.
+
+The fact that the same format specification is used three times can be
+emphasized by storing it in a variable, like this:
+
+@example
+@group
+awk 'BEGIN @{ format = "%-10s %s\n"
+ printf format, "Name", "Number"
+ printf format, "----", "------" @}
+ @{ printf format, $1, $2 @}' BBS-list
+@end group
+@end example
+
+@c !!! exercise
+See if you can use the @code{printf} statement to line up the headings and
+table data for our @file{inventory-shipped} example covered earlier in the
+section on the @code{print} statement
+(@pxref{Print, ,The @code{print} Statement}).
+
+@node Redirection, Special Files, Printf, Printing
+@section Redirecting Output of @code{print} and @code{printf}
+
+@cindex output redirection
+@cindex redirection of output
+So far we have been dealing only with output that prints to the standard
+output, usually your terminal. Both @code{print} and @code{printf} can
+also send their output to other places.
+This is called @dfn{redirection}.
+
+A redirection appears after the @code{print} or @code{printf} statement.
+Redirections in @code{awk} are written just like redirections in shell
+commands, except that they are written inside the @code{awk} program.
+
+There are three forms of output redirection: output to a file,
+output appended to a file, and output through a pipe to another
+command.
+They are all shown for
+the @code{print} statement, but they work identically for @code{printf}
+also.
+
+@table @code
+@item print @var{items} > @var{output-file}
+This type of redirection prints the items into the output file
+@var{output-file}. The file name @var{output-file} can be any
+expression. Its value is changed to a string and then used as a
+file name (@pxref{Expressions}).
+
+When this type of redirection is used, the @var{output-file} is erased
+before the first output is written to it. Subsequent writes
+to the same @var{output-file} do not
+erase @var{output-file}, but append to it. If @var{output-file} does
+not exist, then it is created.
+
+For example, here is how an @code{awk} program can write a list of
+BBS names to a file @file{name-list} and a list of phone numbers to a
+file @file{phone-list}. Each output file contains one name or number
+per line.
+
+@example
+@group
+$ awk '@{ print $2 > "phone-list"
+> print $1 > "name-list" @}' BBS-list
+@end group
+@group
+$ cat phone-list
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end group
+@group
+$ cat name-list
+@print{} aardvark
+@print{} alpo-net
+@dots{}
+@end group
+@end example
+
+@item print @var{items} >> @var{output-file}
+This type of redirection prints the items into the pre-existing output file
+@var{output-file}. The difference between this and the
+single-@samp{>} redirection is that the old contents (if any) of
+@var{output-file} are not erased. Instead, the @code{awk} output is
+appended to the file.
+If @var{output-file} does not exist, then it is created.
+
+@cindex pipes for output
+@cindex output, piping
+@item print @var{items} | @var{command}
+It is also possible to send output to another program through a pipe
+instead of into a
+file. This type of redirection opens a pipe to @var{command} and writes
+the values of @var{items} through this pipe, to another process created
+to execute @var{command}.
+
+The redirection argument @var{command} is actually an @code{awk}
+expression. Its value is converted to a string, whose contents give the
+shell command to be run.
+
+For example, this produces two files, one unsorted list of BBS names
+and one list sorted in reverse alphabetical order:
+
+@example
+awk '@{ print $1 > "names.unsorted"
+ command = "sort -r > names.sorted"
+ print $1 | command @}' BBS-list
+@end example
+
+Here the unsorted list is written with an ordinary redirection while
+the sorted list is written by piping through the @code{sort} utility.
+
+This example uses redirection to mail a message to a mailing
+list @samp{bug-system}. This might be useful when trouble is encountered
+in an @code{awk} script run periodically for system maintenance.
+
+@example
+report = "mail bug-system"
+print "Awk script failed:", $0 | report
+m = ("at record number " FNR " of " FILENAME)
+print m | report
+close(report)
+@end example
+
+The message is built using string concatenation and saved in the variable
+@code{m}. It is then sent down the pipeline to the @code{mail} program.
+
+We call the @code{close} function here because it's a good idea to close
+the pipe as soon as all the intended output has been sent to it.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes},
+for more information
+on this. This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}: it is not necessary to always
+use a string constant. Using a variable is generally a good idea,
+since @code{awk} requires you to spell the string value identically
+every time.
+@end table
+
+Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
+to open a file or pipe only if the particular @var{file} or @var{command}
+you've specified has not already been written to by your program, or if
+it has been closed since it was last written to.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex limitations
+@cindex implementation limits
+@iftex
+As mentioned earlier
+(@pxref{Getline Summary, , Summary of @code{getline} Variants}),
+many
+@end iftex
+@ifinfo
+Many
+@end ifinfo
+@code{awk} implementations limit the number of pipelines an @code{awk}
+program may have open to just one! In @code{gawk}, there is no such limit.
+You can open as many pipelines as the underlying operating system will
+permit.
+
+@node Special Files, Close Files And Pipes , Redirection, Printing
+@section Special File Names in @code{gawk}
+@cindex standard input
+@cindex standard output
+@cindex standard error output
+@cindex file descriptors
+
+Running programs conventionally have three input and output streams
+already available to them for reading and writing. These are known as
+the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
+output}. These streams are, by default, connected to your terminal, but
+they are often redirected with the shell, via the @samp{<}, @samp{<<},
+@samp{>}, @samp{>>}, @samp{>&} and @samp{|} operators. Standard error
+is typically used for writing error messages; the reason we have two separate
+streams, standard output and standard error, is so that they can be
+redirected separately.
+
+@cindex differences between @code{gawk} and @code{awk}
+In other implementations of @code{awk}, the only way to write an error
+message to standard error in an @code{awk} program is as follows:
+
+@example
+print "Serious error detected!" | "cat 1>&2"
+@end example
+
+@noindent
+This works by opening a pipeline to a shell command which can access the
+standard error stream which it inherits from the @code{awk} process.
+This is far from elegant, and is also inefficient, since it requires a
+separate process. So people writing @code{awk} programs often
+neglect to do this. Instead, they send the error messages to the
+terminal, like this:
+
+@example
+@group
+print "Serious error detected!" > "/dev/tty"
+@end group
+@end example
+
+@noindent
+This usually has the same effect, but not always: although the
+standard error stream is usually the terminal, it can be redirected, and
+when that happens, writing to the terminal is not correct. In fact, if
+@code{awk} is run from a background job, it may not have a terminal at all.
+Then opening @file{/dev/tty} will fail.
+
+@code{gawk} provides special file names for accessing the three standard
+streams. When you redirect input or output in @code{gawk}, if the file name
+matches one of these special names, then @code{gawk} directly uses the
+stream it stands for.
+
+@cindex @file{/dev/stdin}
+@cindex @file{/dev/stdout}
+@cindex @file{/dev/stderr}
+@cindex @file{/dev/fd}
+@c @cartouche
+@table @file
+@item /dev/stdin
+The standard input (file descriptor 0).
+
+@item /dev/stdout
+The standard output (file descriptor 1).
+
+@item /dev/stderr
+The standard error output (file descriptor 2).
+
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}. Such a file must have
+been opened by the program initiating the @code{awk} execution (typically
+the shell). Unless you take special pains in the shell from which
+you invoke @code{gawk}, only descriptors 0, 1 and 2 are available.
+@end table
+@c @end cartouche
+
+The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
+respectively, but they are more self-explanatory.
+
+The proper way to write an error message in a @code{gawk} program
+is to use @file{/dev/stderr}, like this:
+
+@example
+print "Serious error detected!" > "/dev/stderr"
+@end example
+
+@code{gawk} also provides special file names that give access to information
+about the running @code{gawk} process. Each of these ``files'' provides
+a single record of information. To read them more than once, you must
+first close them with the @code{close} function
+(@pxref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}).
+The filenames are:
+
+@cindex process information
+@cindex @file{/dev/pid}
+@cindex @file{/dev/pgrpid}
+@cindex @file{/dev/ppid}
+@cindex @file{/dev/user}
+@c @cartouche
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with spaces. The fields represent the
+following information:
+
+@table @code
+@item $1
+The return value of the @code{getuid} system call
+(the real user ID number).
+
+@item $2
+The return value of the @code{geteuid} system call
+(the effective user ID number).
+
+@item $3
+The return value of the @code{getgid} system call
+(the real group ID number).
+
+@item $4
+The return value of the @code{getegid} system call
+(the effective group ID number).
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)
+@end table
+@c @end cartouche
+
+These special file names may be used on the command line as data
+files, as well as for I/O redirections within an @code{awk} program.
+They may not be used as source files with the @samp{-f} option.
+
+Recognition of these special file names is disabled if @code{gawk} is in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+
+@strong{Caution}: Unless your system actually has a @file{/dev/fd} directory
+(or any of the other above listed special files),
+the interpretation of these file names is done by @code{gawk} itself.
+For example, using @samp{/dev/fd/4} for output will actually write on
+file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
+from file descriptor 4. Most of the time this does not matter; however, it
+is important to @emph{not} close any of the files related to file descriptors
+0, 1, and 2. If you do close one of these files, unpredictable behavior
+will result.
+
+The special files that provide process-related information may disappear
+in a future version of @code{gawk}.
+@xref{Future Extensions, ,Probable Future Extensions}.
+
+@node Close Files And Pipes, , Special Files, Printing
+@section Closing Input and Output Files and Pipes
+@cindex closing input files and pipes
+@cindex closing output files and pipes
+@findex close
+
+If the same file name or the same shell command is used with
+@code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}})
+more than once during the execution of an @code{awk}
+program, the file is opened (or the command is executed) only the first time.
+At that time, the first record of input is read from that file or command.
+The next time the same file or command is used in @code{getline}, another
+record is read from it, and so on.
+
+Similarly, when a file or pipe is opened for output, the file name or command
+associated with
+it is remembered by @code{awk} and subsequent writes to the same file or
+command are appended to the previous writes. The file or pipe stays
+open until @code{awk} exits.
+
+This implies that if you want to start reading the same file again from
+the beginning, or if you want to rerun a shell command (rather than
+reading more output from the command), you must take special steps.
+What you must do is use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression. Its
+value must @emph{exactly} match the string that was used to open the file or
+start the command (spaces and other ``irrelevant'' characters
+included). For example, if you open a pipe with this:
+
+@example
+"sort -r names" | getline foo
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r names")
+@end example
+
+Once this function call is executed, the next @code{getline} from that
+file or command, or the next @code{print} or @code{printf} to that
+file or command, will reopen the file or rerun the command.
+
+Because the expression that you use to close a file or pipeline must
+exactly match the expression used to open the file or run the command,
+it is good practice to use a variable to store the file name or command.
+The previous example would become
+
+@example
+sortcom = "sort -r names"
+sortcom | getline foo
+@dots{}
+close(sortcom)
+@end example
+
+@noindent
+This helps avoid hard-to-find typographical errors in your @code{awk}
+programs.
+
+Here are some reasons why you might need to close an output file:
+
+@itemize @bullet
+@item
+To write a file and read it back later on in the same @code{awk}
+program. Close the file when you are finished writing it; then
+you can start reading it with @code{getline}.
+
+@item
+To write numerous files, successively, in the same @code{awk}
+program. If you don't close the files, eventually you may exceed a
+system limit on the number of open files in one process. So close
+each one when you are finished writing it.
+
+@item
+To make a command finish. When you redirect output through a pipe,
+the command reading the pipe normally continues to try to read input
+as long as the pipe is open. Often this means the command cannot
+really do its work until the pipe is closed. For example, if you
+redirect output to the @code{mail} program, the message is not
+actually sent until the pipe is closed.
+
+@item
+To run the same program a second time, with the same arguments.
+This is not the same thing as giving more input to the first run!
+
+For example, suppose you pipe output to the @code{mail} program. If you
+output several lines redirected to this pipe without closing it, they make
+a single message of several lines. By contrast, if you close the pipe
+after each line of output, then each line makes a separate message.
+@end itemize
+
+@vindex ERRNO
+@cindex differences between @code{gawk} and @code{awk}
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex portability issues
+If you use more files than the system allows you to have open,
+@code{gawk} will attempt to multiplex the available open files among
+your data files. @code{gawk}'s ability to do this depends upon the
+facilities of your operating system: it may not always work. It is
+therefore both good practice and good portability advice to always
+use @code{close} on your files when you are done with them.
+
+@node Expressions, Patterns and Actions, Printing, Top
+@chapter Expressions
+@cindex expression
+
+Expressions are the basic building blocks of @code{awk} patterns
+and actions. An expression evaluates to a value, which you can print, test,
+store in a variable or pass to a function. Additionally, an expression
+can assign a new value to a variable or a field, with an assignment operator.
+
+An expression can serve as a pattern or action statement on its own.
+Most other kinds of
+statements contain one or more expressions which specify data on which to
+operate. As in other languages, expressions in @code{awk} include
+variables, array references, constants, and function calls, as well as
+combinations of these with various operators.
+
+@menu
+* Constants:: String, numeric, and regexp constants.
+* Using Constant Regexps:: When and how to use a regexp constant.
+* Variables:: Variables give names to values for later use.
+* Conversion:: The conversion of strings to numbers and vice
+ versa.
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-},
+ etc.)
+* Concatenation:: Concatenating strings.
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+* Truth Values:: What is ``true'' and what is ``false''.
+* Typing and Comparison:: How variables acquire types, and how this
+ affects comparison of numbers and strings with
+ @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions using boolean
+ operators @samp{||} (``or''), @samp{&&}
+ (``and'') and @samp{!} (``not'').
+* Conditional Exp:: Conditional expressions select between two
+ subexpressions under control of a third
+ subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+@end menu
+
+@node Constants, Using Constant Regexps, Expressions, Expressions
+@section Constant Expressions
+@cindex constants, types of
+@cindex string constants
+
+The simplest type of expression is the @dfn{constant}, which always has
+the same value. There are three types of constants: numeric constants,
+string constants, and regular expression constants.
+
+@menu
+* Scalar Constants:: Numeric and string constants.
+* Regexp Constants:: Regular Expression constants.
+@end menu
+
+@node Scalar Constants, Regexp Constants, Constants, Constants
+@subsection Numeric and String Constants
+
+@cindex numeric constant
+@cindex numeric value
+A @dfn{numeric constant} stands for a number. This number can be an
+integer, a decimal fraction, or a number in scientific (exponential)
+notation.@footnote{The internal representation uses double-precision
+floating point numbers. If you don't know what that means, then don't
+worry about it.} Here are some examples of numeric constants, which all
+have the same value:
+
+@example
+105
+1.05e+2
+1050e-1
+@end example
+
+A string constant consists of a sequence of characters enclosed in
+double-quote marks. For example:
+
+@example
+"parrot"
+@end example
+
+@noindent
+@cindex differences between @code{gawk} and @code{awk}
+represents the string whose contents are @samp{parrot}. Strings in
+@code{gawk} can be of any length and they can contain any of the possible
+eight-bit ASCII characters including ASCII NUL (character code zero).
+Other @code{awk}
+implementations may have difficulty with some character codes.
+
+@node Regexp Constants, , Scalar Constants, Constants
+@subsection Regular Expression Constants
+
+@cindex @code{~} operator
+@cindex @code{!~} operator
+A regexp constant is a regular expression description enclosed in
+slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
+@code{awk} programs are constant, but the @samp{~} and @samp{!~}
+matching operators can also match computed or ``dynamic'' regexps
+(which are just ordinary strings or variables that contain a regexp).
+
+@node Using Constant Regexps, Variables, Constants, Expressions
+@section Using Regular Expression Constants
+
+When used on the right hand side of the @samp{~} or @samp{!~}
+operators, a regexp constant merely stands for the regexp that is to be
+matched.
+
+@cindex dark corner
+Regexp constants (such as @code{/foo/}) may be used like simple expressions.
+When a
+regexp constant appears by itself, it has the same meaning as if it appeared
+in a pattern, i.e.@: @samp{($0 ~ /foo/)} (d.c.)
+(@pxref{Expression Patterns, ,Expressions as Patterns}).
+This means that the two code segments,
+
+@example
+if ($0 ~ /barfly/ || $0 ~ /camelot/)
+ print "found"
+@end example
+
+@noindent
+and
+
+@example
+if (/barfly/ || /camelot/)
+ print "found"
+@end example
+
+@noindent
+are exactly equivalent.
+
+One rather bizarre consequence of this rule is that the following
+boolean expression is valid, but does not do what the user probably
+intended:
+
+@example
+# note that /foo/ is on the left of the ~
+if (/foo/ ~ $1) print "found foo"
+@end example
+
+@noindent
+This code is ``obviously'' testing @code{$1} for a match against the regexp
+@code{/foo/}. But in fact, the expression @samp{/foo/ ~ $1} actually means
+@samp{($0 ~ /foo/) ~ $1}. In other words, first match the input record
+against the regexp @code{/foo/}. The result will be either zero or one,
+depending upon the success or failure of the match. Then match that result
+against the first field in the record.
+
+Since it is unlikely that you would ever really wish to make this kind of
+test, @code{gawk} will issue a warning when it sees this construct in
+a program.
+
+Another consequence of this rule is that the assignment statement
+
+@example
+matches = /foo/
+@end example
+
+@noindent
+will assign either zero or one to the variable @code{matches}, depending
+upon the contents of the current input record.
+
+This feature of the language was never well documented until the
+POSIX specification.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex dark corner
+Constant regular expressions are also used as the first argument for
+the @code{gensub}, @code{sub} and @code{gsub} functions, and as the
+second argument of the @code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Modern implementations of @code{awk}, including @code{gawk}, allow
+the third argument of @code{split} to be a regexp constant, while some
+older implementations do not (d.c.).
+
+This can lead to confusion when attempting to use regexp constants
+as arguments to user defined functions
+(@pxref{User-defined, , User-defined Functions}).
+For example:
+
+@example
+@group
+function mysub(pat, repl, str, global)
+@{
+ if (global)
+ gsub(pat, repl, str)
+ else
+ sub(pat, repl, str)
+ return str
+@}
+@end group
+
+@group
+@{
+ @dots{}
+ text = "hi! hi yourself!"
+ mysub(/hi/, "howdy", text, 1)
+ @dots{}
+@}
+@end group
+@end example
+
+In this example, the programmer wishes to pass a regexp constant to the
+user-defined function @code{mysub}, which will in turn pass it on to
+either @code{sub} or @code{gsub}. However, what really happens is that
+the @code{pat} parameter will be either one or zero, depending upon whether
+or not @code{$0} matches @code{/hi/}.
+
+As it is unlikely that you would ever really wish to pass a truth value
+in this way, @code{gawk} will issue a warning when it sees a regexp
+constant used as a parameter to a user-defined function.
+
+@node Variables, Conversion, Using Constant Regexps, Expressions
+@section Variables
+
+Variables are ways of storing values at one point in your program for
+use later in another part of your program. You can manipulate them
+entirely within your program text, and you can also assign values to
+them on the @code{awk} command line.
+
+@menu
+* Using Variables:: Using variables in your programs.
+* Assignment Options:: Setting variables on the command line and a
+ summary of command line syntax. This is an
+ advanced method of input.
+@end menu
+
+@node Using Variables, Assignment Options, Variables, Variables
+@subsection Using Variables in a Program
+
+@cindex variables, user-defined
+@cindex user-defined variables
+Variables let you give names to values and refer to them later. You have
+already seen variables in many of the examples. The name of a variable
+must be a sequence of letters, digits and underscores, but it may not begin
+with a digit. Case is significant in variable names; @code{a} and @code{A}
+are distinct variables.
+
+A variable name is a valid expression by itself; it represents the
+variable's current value. Variables are given new values with
+@dfn{assignment operators}, @dfn{increment operators} and
+@dfn{decrement operators}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+A few variables have special built-in meanings, such as @code{FS}, the
+field separator, and @code{NF}, the number of fields in the current
+input record. @xref{Built-in Variables}, for a list of them. These
+built-in variables can be used and assigned just like all other
+variables, but their values are also used or changed automatically by
+@code{awk}. All built-in variables names are entirely upper-case.
+
+Variables in @code{awk} can be assigned either numeric or string
+values. By default, variables are initialized to the empty string, which
+is zero if converted to a number. There is no need to
+``initialize'' each variable explicitly in @code{awk},
+the way you would in C and in most other traditional languages.
+
+@node Assignment Options, , Using Variables, Variables
+@subsection Assigning Variables on the Command Line
+
+You can set any @code{awk} variable by including a @dfn{variable assignment}
+among the arguments on the command line when you invoke @code{awk}
+(@pxref{Other Arguments, ,Other Command Line Arguments}). Such an assignment has
+this form:
+
+@example
+@var{variable}=@var{text}
+@end example
+
+@noindent
+With it, you can set a variable either at the beginning of the
+@code{awk} run or in between input files.
+
+If you precede the assignment with the @samp{-v} option, like this:
+
+@example
+-v @var{variable}=@var{text}
+@end example
+
+@noindent
+then the variable is set at the very beginning, before even the
+@code{BEGIN} rules are run. The @samp{-v} option and its assignment
+must precede all the file name arguments, as well as the program text.
+(@xref{Options, ,Command Line Options}, for more information about
+the @samp{-v} option.)
+
+Otherwise, the variable assignment is performed at a time determined by
+its position among the input file arguments: after the processing of the
+preceding input file argument. For example:
+
+@example
+awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@end example
+
+@noindent
+prints the value of field number @code{n} for all input records. Before
+the first file is read, the command line sets the variable @code{n}
+equal to four. This causes the fourth field to be printed in lines from
+the file @file{inventory-shipped}. After the first file has finished,
+but before the second file is started, @code{n} is set to two, so that the
+second field is printed in lines from @file{BBS-list}.
+
+@example
+@group
+$ awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@print{} 15
+@print{} 24
+@dots{}
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end group
+@end example
+
+Command line arguments are made available for explicit examination by
+the @code{awk} program in an array named @code{ARGV}
+(@pxref{ARGC and ARGV, ,Using @code{ARGC} and @code{ARGV}}).
+
+@cindex dark corner
+@code{awk} processes the values of command line assignments for escape
+sequences (d.c.) (@pxref{Escape Sequences}).
+
+@node Conversion, Arithmetic Ops, Variables, Expressions
+@section Conversion of Strings and Numbers
+
+@cindex conversion of strings and numbers
+Strings are converted to numbers, and numbers to strings, if the context
+of the @code{awk} program demands it. For example, if the value of
+either @code{foo} or @code{bar} in the expression @samp{foo + bar}
+happens to be a string, it is converted to a number before the addition
+is performed. If numeric values appear in string concatenation, they
+are converted to strings. Consider this:
+
+@example
+two = 2; three = 3
+print (two three) + 4
+@end example
+
+@noindent
+This prints the (numeric) value 27. The numeric values of
+the variables @code{two} and @code{three} are converted to strings and
+concatenated together, and the resulting string is converted back to the
+number 23, to which four is then added.
+
+@cindex null string
+@cindex empty string
+@cindex type conversion
+If, for some reason, you need to force a number to be converted to a
+string, concatenate the empty string, @code{""}, with that number.
+To force a string to be converted to a number, add zero to that string.
+
+A string is converted to a number by interpreting any numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
+has a numeric value of 25.
+Strings that can't be interpreted as valid numbers are converted to
+zero.
+
+@vindex CONVFMT
+The exact manner in which numbers are converted into strings is controlled
+by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
+Numbers are converted using the @code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation})
+with @code{CONVFMT} as the format
+specifier.
+
+@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
+at least six significant digits. For some applications you will want to
+change it to specify more precision. Double precision on most modern
+machines gives you 16 or 17 decimal digits of precision.
+
+Strange results can happen if you set @code{CONVFMT} to a string that doesn't
+tell @code{sprintf} how to format floating point numbers in a useful way.
+For example, if you forget the @samp{%} in the format, all numbers will be
+converted to the same constant string.
+
+@cindex dark corner
+As a special case, if a number is an integer, then the result of converting
+it to a string is @emph{always} an integer, no matter what the value of
+@code{CONVFMT} may be. Given the following code fragment:
+
+@example
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+@end example
+
+@noindent
+@code{b} has the value @code{"12"}, not @code{"12.00"} (d.c.).
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@vindex OFMT
+Prior to the POSIX standard, @code{awk} specified that the value
+of @code{OFMT} was used for converting numbers to strings. @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversion from the semantics of printing. Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
+of cases, old @code{awk} programs will not change their behavior.
+However, this use of @code{OFMT} is something to keep in mind if you must
+port your program to other implementations of @code{awk}; we recommend
+that instead of changing your programs, you just port @code{gawk} itself!
+@xref{Print, ,The @code{print} Statement},
+for more information on the @code{print} statement.
+
+@node Arithmetic Ops, Concatenation, Conversion, Expressions
+@section Arithmetic Operators
+@cindex arithmetic operators
+@cindex operators, arithmetic
+@cindex addition
+@cindex subtraction
+@cindex multiplication
+@cindex division
+@cindex remainder
+@cindex quotient
+@cindex exponentiation
+
+The @code{awk} language uses the common arithmetic operators when
+evaluating expressions. All of these arithmetic operators follow normal
+precedence rules, and work as you would expect them to.
+
+Here is a file @file{grades} containing a list of student names and
+three test scores per student (it's a small class):
+
+@example
+Pat 100 97 58
+Sandy 84 72 93
+Chris 72 92 89
+@end example
+
+@noindent
+This programs takes the file @file{grades}, and prints the average
+of the scores.
+
+@example
+$ awk '@{ sum = $2 + $3 + $4 ; avg = sum / 3
+> print $1, avg @}' grades
+@print{} Pat 85
+@print{} Sandy 83
+@print{} Chris 84.3333
+@end example
+
+This table lists the arithmetic operators in @code{awk}, in order from
+highest precedence to lowest:
+
+@c @cartouche
+@table @code
+@item - @var{x}
+Negation.
+
+@item + @var{x}
+Unary plus. The expression is converted to a number.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item @var{x} ^ @var{y}
+@itemx @var{x} ** @var{y}
+Exponentiation: @var{x} raised to the @var{y} power. @samp{2 ^ 3} has
+the value eight. The character sequence @samp{**} is equivalent to
+@samp{^}. (The POSIX standard only specifies the use of @samp{^}
+for exponentiation.)
+
+@item @var{x} * @var{y}
+Multiplication.
+
+@item @var{x} / @var{y}
+Division. Since all numbers in @code{awk} are
+real numbers, the result is not rounded to an integer: @samp{3 / 4}
+has the value 0.75.
+
+@item @var{x} % @var{y}
+@cindex differences between @code{gawk} and @code{awk}
+Remainder. The quotient is rounded toward zero to an integer,
+multiplied by @var{y} and this result is subtracted from @var{x}.
+This operation is sometimes known as ``trunc-mod.'' The following
+relation always holds:
+
+@example
+b * int(a / b) + (a % b) == a
+@end example
+
+One possibly undesirable effect of this definition of remainder is that
+@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus,
+
+@example
+-17 % 8 = -1
+@end example
+
+In other @code{awk} implementations, the signedness of the remainder
+may be machine dependent.
+@c !!! what does posix say?
+
+@item @var{x} + @var{y}
+Addition.
+
+@item @var{x} - @var{y}
+Subtraction.
+@end table
+@c @end cartouche
+
+For maximum portability, do not use the @samp{**} operator.
+
+Unary plus and minus have the same precedence,
+the multiplication operators all have the same precedence, and
+addition and subtraction have the same precedence.
+
+@node Concatenation, Assignment Ops, Arithmetic Ops, Expressions
+@section String Concatenation
+@cindex Kernighan, Brian
+@display
+@i{It seemed like a good idea at the time.}
+Brian Kernighan
+@end display
+@sp 1
+
+@cindex string operators
+@cindex operators, string
+@cindex concatenation
+There is only one string operation: concatenation. It does not have a
+specific operator to represent it. Instead, concatenation is performed by
+writing expressions next to one another, with no operator. For example:
+
+@example
+@group
+$ awk '@{ print "Field number one: " $1 @}' BBS-list
+@print{} Field number one: aardvark
+@print{} Field number one: alpo-net
+@dots{}
+@end group
+@end example
+
+Without the space in the string constant after the @samp{:}, the line
+would run together. For example:
+
+@example
+@group
+$ awk '@{ print "Field number one:" $1 @}' BBS-list
+@print{} Field number one:aardvark
+@print{} Field number one:alpo-net
+@dots{}
+@end group
+@end example
+
+Since string concatenation does not have an explicit operator, it is
+often necessary to insure that it happens where you want it to by
+using parentheses to enclose
+the items to be concatenated. For example, the
+following code fragment does not concatenate @code{file} and @code{name}
+as you might expect:
+
+@example
+@group
+file = "file"
+name = "name"
+print "something meaningful" > file name
+@end group
+@end example
+
+@noindent
+It is necessary to use the following:
+
+@example
+print "something meaningful" > (file name)
+@end example
+
+We recommend that you use parentheses around concatenation in all but the
+most common contexts (such as on the right-hand side of @samp{=}).
+
+@node Assignment Ops, Increment Ops, Concatenation, Expressions
+@section Assignment Expressions
+@cindex assignment operators
+@cindex operators, assignment
+@cindex expression, assignment
+
+An @dfn{assignment} is an expression that stores a new value into a
+variable. For example, let's assign the value one to the variable
+@code{z}:
+
+@example
+z = 1
+@end example
+
+After this expression is executed, the variable @code{z} has the value one.
+Whatever old value @code{z} had before the assignment is forgotten.
+
+Assignments can store string values also. For example, this would store
+the value @code{"this food is good"} in the variable @code{message}:
+
+@example
+thing = "food"
+predicate = "good"
+message = "this " thing " is " predicate
+@end example
+
+@noindent
+(This also illustrates string concatenation.)
+
+The @samp{=} sign is called an @dfn{assignment operator}. It is the
+simplest assignment operator because the value of the right-hand
+operand is stored unchanged.
+
+@cindex side effect
+Most operators (addition, concatenation, and so on) have no effect
+except to compute a value. If you ignore the value, you might as well
+not use the operator. An assignment operator is different; it does
+produce a value, but even if you ignore the value, the assignment still
+makes itself felt through the alteration of the variable. We call this
+a @dfn{side effect}.
+
+@cindex lvalue
+@cindex rvalue
+The left-hand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
+an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
+These are all called @dfn{lvalues},
+which means they can appear on the left-hand side of an assignment operator.
+The right-hand operand may be any expression; it produces the new value
+which the assignment stores in the specified variable, field or array
+element. (Such values are called @dfn{rvalues}).
+
+@cindex types of variables
+It is important to note that variables do @emph{not} have permanent types.
+The type of a variable is simply the type of whatever value it happens
+to hold at the moment. In the following program fragment, the variable
+@code{foo} has a numeric value at first, and a string value later on:
+
+@example
+@group
+foo = 1
+print foo
+foo = "bar"
+print foo
+@end group
+@end example
+
+@noindent
+When the second assignment gives @code{foo} a string value, the fact that
+it previously had a numeric value is forgotten.
+
+String values that do not begin with a digit have a numeric value of
+zero. After executing this code, the value of @code{foo} is five:
+
+@example
+foo = "a string"
+foo = foo + 5
+@end example
+
+@noindent
+(Note that using a variable as a number and then later as a string can
+be confusing and is poor programming style. The above examples illustrate how
+@code{awk} works, @emph{not} how you should write your own programs!)
+
+An assignment is an expression, so it has a value: the same value that
+is assigned. Thus, @samp{z = 1} as an expression has the value one.
+One consequence of this is that you can write multiple assignments together:
+
+@example
+x = y = z = 0
+@end example
+
+@noindent
+stores the value zero in all three variables. It does this because the
+value of @samp{z = 0}, which is zero, is stored into @code{y}, and then
+the value of @samp{y = z = 0}, which is zero, is stored into @code{x}.
+
+You can use an assignment anywhere an expression is called for. For
+example, it is valid to write @samp{x != (y = 1)} to set @code{y} to one
+and then test whether @code{x} equals one. But this style tends to make
+programs hard to read; except in a one-shot program, you should
+not use such nesting of assignments.
+
+Aside from @samp{=}, there are several other assignment operators that
+do arithmetic with the old value of the variable. For example, the
+operator @samp{+=} computes a new value by adding the right-hand value
+to the old value of the variable. Thus, the following assignment adds
+five to the value of @code{foo}:
+
+@example
+foo += 5
+@end example
+
+@noindent
+This is equivalent to the following:
+
+@example
+foo = foo + 5
+@end example
+
+@noindent
+Use whichever one makes the meaning of your program clearer.
+
+There are situations where using @samp{+=} (or any assignment operator)
+is @emph{not} the same as simply repeating the left-hand operand in the
+right-hand expression. For example:
+
+@cindex Rankin, Pat
+@example
+@group
+# Thanks to Pat Rankin for this example
+BEGIN @{
+ foo[rand()] += 5
+ for (x in foo)
+ print x, foo[x]
+
+ bar[rand()] = bar[rand()] + 5
+ for (x in bar)
+ print x, bar[x]
+@}
+@end group
+@end example
+
+@noindent
+The indices of @code{bar} are guaranteed to be different, because
+@code{rand} will return different values each time it is called.
+(Arrays and the @code{rand} function haven't been covered yet.
+@xref{Arrays, ,Arrays in @code{awk}},
+and see @ref{Numeric Functions, ,Numeric Built-in Functions}, for more information).
+This example illustrates an important fact about the assignment
+operators: the left-hand expression is only evaluated @emph{once}.
+
+It is also up to the implementation as to which expression is evaluated
+first, the left-hand one or the right-hand one.
+Consider this example:
+
+@example
+i = 1
+a[i += 2] = i + 1
+@end example
+
+@noindent
+The value of @code{a[3]} could be either two or four.
+
+Here is a table of the arithmetic assignment operators. In each
+case, the right-hand operand is an expression whose value is converted
+to a number.
+
+@c @cartouche
+@table @code
+@item @var{lvalue} += @var{increment}
+Adds @var{increment} to the value of @var{lvalue} to make the new value
+of @var{lvalue}.
+
+@item @var{lvalue} -= @var{decrement}
+Subtracts @var{decrement} from the value of @var{lvalue}.
+
+@item @var{lvalue} *= @var{coefficient}
+Multiplies the value of @var{lvalue} by @var{coefficient}.
+
+@item @var{lvalue} /= @var{divisor}
+Divides the value of @var{lvalue} by @var{divisor}.
+
+@item @var{lvalue} %= @var{modulus}
+Sets @var{lvalue} to its remainder by @var{modulus}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item @var{lvalue} ^= @var{power}
+@itemx @var{lvalue} **= @var{power}
+Raises @var{lvalue} to the power @var{power}.
+(Only the @samp{^=} operator is specified by POSIX.)
+@end table
+@c @end cartouche
+
+For maximum portability, do not use the @samp{**=} operator.
+
+@node Increment Ops, Truth Values, Assignment Ops, Expressions
+@section Increment and Decrement Operators
+
+@cindex increment operators
+@cindex operators, increment
+@dfn{Increment} and @dfn{decrement operators} increase or decrease the value of
+a variable by one. You could do the same thing with an assignment operator, so
+the increment operators add no power to the @code{awk} language; but they
+are convenient abbreviations for very common operations.
+
+The operator to add one is written @samp{++}. It can be used to increment
+a variable either before or after taking its value.
+
+To pre-increment a variable @var{v}, write @samp{++@var{v}}. This adds
+one to the value of @var{v} and that new value is also the value of this
+expression. The assignment expression @samp{@var{v} += 1} is completely
+equivalent.
+
+Writing the @samp{++} after the variable specifies post-increment. This
+increments the variable value just the same; the difference is that the
+value of the increment expression itself is the variable's @emph{old}
+value. Thus, if @code{foo} has the value four, then the expression @samp{foo++}
+has the value four, but it changes the value of @code{foo} to five.
+
+The post-increment @samp{foo++} is nearly equivalent to writing @samp{(foo
++= 1) - 1}. It is not perfectly equivalent because all numbers in
+@code{awk} are floating point: in floating point, @samp{foo + 1 - 1} does
+not necessarily equal @code{foo}. But the difference is minute as
+long as you stick to numbers that are fairly small (less than 10e12).
+
+Any lvalue can be incremented. Fields and array elements are incremented
+just like variables. (Use @samp{$(i++)} when you wish to do a field reference
+and a variable increment at the same time. The parentheses are necessary
+because of the precedence of the field reference operator, @samp{$}.)
+
+@cindex decrement operators
+@cindex operators, decrement
+The decrement operator @samp{--} works just like @samp{++} except that
+it subtracts one instead of adding. Like @samp{++}, it can be used before
+the lvalue to pre-decrement or after it to post-decrement.
+
+Here is a summary of increment and decrement expressions.
+
+@c @cartouche
+@table @code
+@item ++@var{lvalue}
+This expression increments @var{lvalue} and the new value becomes the
+value of the expression.
+
+@item @var{lvalue}++
+This expression increments @var{lvalue}, but
+the value of the expression is the @emph{old} value of @var{lvalue}.
+
+@item --@var{lvalue}
+Like @samp{++@var{lvalue}}, but instead of adding, it subtracts. It
+decrements @var{lvalue} and delivers the value that results.
+
+@item @var{lvalue}--
+Like @samp{@var{lvalue}++}, but instead of adding, it subtracts. It
+decrements @var{lvalue}. The value of the expression is the @emph{old}
+value of @var{lvalue}.
+@end table
+@c @end cartouche
+
+@node Truth Values, Typing and Comparison, Increment Ops, Expressions
+@section True and False in @code{awk}
+@cindex truth values
+@cindex logical true
+@cindex logical false
+
+Many programming languages have a special representation for the concepts
+of ``true'' and ``false.'' Such languages usually use the special
+constants @code{true} and @code{false}, or perhaps their upper-case
+equivalents.
+
+@cindex null string
+@cindex empty string
+@code{awk} is different. It borrows a very simple concept of true and
+false from C. In @code{awk}, any non-zero numeric value, @emph{or} any
+non-empty string value is true. Any other value (zero or the null
+string, @code{""}) is false. The following program will print @samp{A strange
+truth value} three times:
+
+@example
+@group
+BEGIN @{
+ if (3.1415927)
+ print "A strange truth value"
+ if ("Four Score And Seven Years Ago")
+ print "A strange truth value"
+ if (j = 57)
+ print "A strange truth value"
+@}
+@end group
+@end example
+
+@cindex dark corner
+There is a surprising consequence of the ``non-zero or non-null'' rule:
+The string constant @code{"0"} is actually true, since it is non-null (d.c.).
+
+@node Typing and Comparison, Boolean Ops, Truth Values, Expressions
+@section Variable Typing and Comparison Expressions
+@cindex comparison expressions
+@cindex expression, comparison
+@cindex expression, matching
+@cindex relational operators
+@cindex operators, relational
+@cindex regexp match/non-match operators
+@cindex variable typing
+@cindex types of variables
+@c 2e: consider splitting this section into subsections
+@display
+@i{The Guide is definitive. Reality is frequently inaccurate.}
+The Hitchhiker's Guide to the Galaxy
+@end display
+@sp 1
+
+Unlike other programming languages, @code{awk} variables do not have a
+fixed type. Instead, they can be either a number or a string, depending
+upon the value that is assigned to them.
+
+@cindex numeric string
+The 1992 POSIX standard introduced
+the concept of a @dfn{numeric string}, which is simply a string that looks
+like a number, for example, @code{@w{" +2"}}. This concept is used
+for determining the type of a variable.
+
+The type of the variable is important, since the types of two variables
+determine how they are compared.
+
+In @code{gawk}, variable typing follows these rules.
+
+@enumerate 1
+@item
+A numeric literal or the result of a numeric operation has the @var{numeric}
+attribute.
+
+@item
+A string literal or the result of a string operation has the @var{string}
+attribute.
+
+@item
+Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
+@code{ENVIRON} elements and the
+elements of an array created by @code{split} that are numeric strings
+have the @var{strnum} attribute. Otherwise, they have the @var{string}
+attribute.
+Uninitialized variables also have the @var{strnum} attribute.
+
+@item
+Attributes propagate across assignments, but are not changed by
+any use.
+@c (Although a use may cause the entity to acquire an additional
+@c value such that it has both a numeric and string value -- this leaves the
+@c attribute unchanged.)
+@c This is important but not relevant
+@end enumerate
+
+The last rule is particularly important. In the following program,
+@code{a} has numeric type, even though it is later used in a string
+operation.
+
+@example
+BEGIN @{
+ a = 12.345
+ b = a " is a cute number"
+ print b
+@}
+@end example
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following, symmetric, matrix:
+
+@c thanks to Karl Berry, kb@cs.umb.edu, for major help with TeX tables
+@tex
+\centerline{
+\vbox{\bigskip % space above the table (about 1 linespace)
+% Because we have vertical rules, we can't let TeX insert interline space
+% in its usual way.
+\offinterlineskip
+%
+% Define the table template. & separates columns, and \cr ends the
+% template (and each row). # is replaced by the text of that entry on
+% each row. The template for the first column breaks down like this:
+% \strut -- a way to make each line have the height and depth
+% of a normal line of type, since we turned off interline spacing.
+% \hfil -- infinite glue; has the effect of right-justifying in this case.
+% # -- replaced by the text (for instance, `STRNUM', in the last row).
+% \quad -- about the width of an `M'. Just separates the columns.
+%
+% The second column (\vrule#) is what generates the vertical rule that
+% spans table rows.
+%
+% The doubled && before the next entry means `repeat the following
+% template as many times as necessary on each line' -- in our case, twice.
+%
+% The template itself, \quad#\hfil, left-justifies with a little space before.
+%
+\halign{\strut\hfil#\quad&\vrule#&&\quad#\hfil\cr
+ &&STRING &NUMERIC &STRNUM\cr
+% The \omit tells TeX to skip inserting the template for this column on
+% this particular row. In this case, we only want a little extra space
+% to separate the heading row from the rule below it. the depth 2pt --
+% `\vrule depth 2pt' is that little space.
+\omit &depth 2pt\cr
+% This is the horizontal rule below the heading. Since it has nothing to
+% do with the columns of the table, we use \noalign to get it in there.
+\noalign{\hrule}
+% Like above, this time a little more space.
+\omit &depth 4pt\cr
+% The remaining rows have nothing special about them.
+STRING &&string &string &string\cr
+NUMERIC &&string &numeric &numeric\cr
+STRNUM &&string &numeric &numeric\cr
+}}}
+@end tex
+@ifinfo
+@display
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
+@end display
+@end ifinfo
+
+The basic idea is that user input that looks numeric, and @emph{only}
+user input, should be treated as numeric, even though it is actually
+made of characters, and is therefore also a string.
+
+@dfn{Comparison expressions} compare strings or numbers for
+relationships such as equality. They are written using @dfn{relational
+operators}, which are a superset of those in C. Here is a table of
+them:
+
+@cindex relational operators
+@cindex operators, relational
+@cindex @code{<} operator
+@cindex @code{<=} operator
+@cindex @code{>} operator
+@cindex @code{>=} operator
+@cindex @code{==} operator
+@cindex @code{!=} operator
+@cindex @code{~} operator
+@cindex @code{!~} operator
+@cindex @code{in} operator
+@c @cartouche
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if the string @var{x} matches the regexp denoted by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if the string @var{x} does not match the regexp denoted by @var{y}.
+
+@item @var{subscript} in @var{array}
+True if the array @var{array} has an element with the subscript @var{subscript}.
+@end table
+@c @end cartouche
+
+Comparison expressions have the value one if true and zero if false.
+
+When comparing operands of mixed types, numeric operands are converted
+to strings using the value of @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+Strings are compared
+by comparing the first character of each, then the second character of each,
+and so on. Thus @code{"10"} is less than @code{"9"}. If there are two
+strings where one is a prefix of the other, the shorter string is less than
+the longer one. Thus @code{"abc"} is less than @code{"abcd"}.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+It is very easy to accidentally mistype the @samp{==} operator, and
+leave off one of the @samp{=}s. The result is still valid @code{awk}
+code, but the program will not do what you mean:
+
+@example
+if (a = b) # oops! should be a == b
+ @dots{}
+else
+ @dots{}
+@end example
+
+@noindent
+Unless @code{b} happens to be zero or the null string, the @code{if}
+part of the test will always succeed. Because the operators are
+so similar, this kind of error is very difficult to spot when
+scanning the source code.
+
+Here are some sample expressions, how @code{gawk} compares them, and what
+the result of the comparison is.
+
+@table @code
+@item 1.5 <= 2.0
+numeric comparison (true)
+
+@item "abc" >= "xyz"
+string comparison (false)
+
+@item 1.5 != " +2"
+string comparison (true)
+
+@item "1e2" < "3"
+string comparison (true)
+
+@item a = 2; b = "2"
+@itemx a == b
+string comparison (true)
+
+@item a = 2; b = " +2"
+@itemx a == b
+string comparison (false)
+@end table
+
+In this example,
+
+@example
+@group
+$ echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
+@print{} false
+@end group
+@end example
+
+@noindent
+the result is @samp{false} since both @code{$1} and @code{$2} are numeric
+strings and thus both have the @var{strnum} attribute,
+dictating a numeric comparison.
+
+The purpose of the comparison rules and the use of numeric strings is
+to attempt to produce the behavior that is ``least surprising,'' while
+still ``doing the right thing.''
+
+@cindex comparisons, string vs. regexp
+@cindex string comparison vs. regexp comparison
+@cindex regexp comparison vs. string comparison
+String comparisons and regular expression comparisons are very different.
+For example,
+
+@example
+x == "foo"
+@end example
+
+@noindent
+has the value of one, or is true, if the variable @code{x}
+is precisely @samp{foo}. By contrast,
+
+@example
+x ~ /foo/
+@end example
+
+@noindent
+has the value one if @code{x} contains @samp{foo}, such as
+@code{"Oh, what a fool am I!"}.
+
+The right hand operand of the @samp{~} and @samp{!~} operators may be
+either a regexp constant (@code{/@dots{}/}), or an ordinary
+expression, in which case the value of the expression as a string is used as a
+dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}; also
+@pxref{Computed Regexps, ,Using Dynamic Regexps}).
+
+@cindex regexp as expression
+In recent implementations of @code{awk}, a constant regular
+expression in slashes by itself is also an expression. The regexp
+@code{/@var{regexp}/} is an abbreviation for this comparison expression:
+
+@example
+$0 ~ /@var{regexp}/
+@end example
+
+One special place where @code{/foo/} is @emph{not} an abbreviation for
+@samp{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
+@samp{!~}!
+@xref{Using Constant Regexps, ,Using Regular Expression Constants},
+where this is discussed in more detail.
+
+@c This paragraph has been here since day 1, and has always bothered
+@c me, especially since the expression doesn't really make a lot of
+@c sense. So, just take it out.
+@ignore
+In some contexts it may be necessary to write parentheses around the
+regexp to avoid confusing the @code{gawk} parser. For example,
+@samp{(/x/ - /y/) > threshold} is not allowed, but @samp{((/x/) - (/y/))
+> threshold} parses properly.
+@end ignore
+
+@node Boolean Ops, Conditional Exp, Typing and Comparison, Expressions
+@section Boolean Expressions
+@cindex expression, boolean
+@cindex boolean expressions
+@cindex operators, boolean
+@cindex boolean operators
+@cindex logical operations
+@cindex operations, logical
+@cindex short-circuit operators
+@cindex operators, short-circuit
+@cindex and operator
+@cindex or operator
+@cindex not operator
+@cindex @code{&&} operator
+@cindex @code{||} operator
+@cindex @code{!} operator
+
+A @dfn{boolean expression} is a combination of comparison expressions or
+matching expressions, using the boolean operators ``or''
+(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
+parentheses to control nesting. The truth value of the boolean expression is
+computed by combining the truth values of the component expressions.
+Boolean expressions are also referred to as @dfn{logical expressions}.
+The terms are equivalent.
+
+Boolean expressions can be used wherever comparison and matching
+expressions can be used. They can be used in @code{if}, @code{while},
+@code{do} and @code{for} statements
+(@pxref{Statements, ,Control Statements in Actions}).
+They have numeric values (one if true, zero if false), which come into play
+if the result of the boolean expression is stored in a variable, or
+used in arithmetic.
+
+In addition, every boolean expression is also a valid pattern, so
+you can use one as a pattern to control the execution of rules.
+
+Here are descriptions of the three boolean operators, with examples.
+
+@c @cartouche
+@table @code
+@item @var{boolean1} && @var{boolean2}
+True if both @var{boolean1} and @var{boolean2} are true. For example,
+the following statement prints the current input record if it contains
+both @samp{2400} and @samp{foo}.
+
+@example
+if ($0 ~ /2400/ && $0 ~ /foo/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is true. This can make a difference when @var{boolean2} contains
+expressions that have side effects: in the case of @samp{$0 ~ /foo/ &&
+($2 == bar++)}, the variable @code{bar} is not incremented if there is
+no @samp{foo} in the record.
+
+@item @var{boolean1} || @var{boolean2}
+True if at least one of @var{boolean1} or @var{boolean2} is true.
+For example, the following statement prints all records in the input
+that contain @emph{either} @samp{2400} or
+@samp{foo}, or both.
+
+@example
+if ($0 ~ /2400/ || $0 ~ /foo/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is false. This can make a difference when @var{boolean2} contains
+expressions that have side effects.
+
+@item ! @var{boolean}
+True if @var{boolean} is false. For example, the following program prints
+all records in the input file @file{BBS-list} that do @emph{not} contain the
+string @samp{foo}.
+
+@c A better example would be `if (! (subscript in array)) ...' but we
+@c haven't done anything with arrays or `in' yet. Sigh.
+@example
+awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
+@end example
+@end table
+@c @end cartouche
+
+The @samp{&&} and @samp{||} operators are called @dfn{short-circuit}
+operators because of the way they work. Evaluation of the full expression
+is ``short-circuited'' if the result can be determined part way through
+its evaluation.
+
+@cindex line continuation
+You can continue a statement that uses @samp{&&} or @samp{||} simply
+by putting a newline after them. But you cannot put a newline in front
+of either of these operators without using backslash continuation
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+The actual value of an expression using the @samp{!} operator will be
+either one or zero, depending upon the truth value of the expression it
+is applied to.
+
+The @samp{!} operator is often useful for changing the sense of a flag
+variable from false to true and back again. For example, the following
+program is one way to print lines in between special bracketing lines:
+
+@example
+$1 == "START" @{ interested = ! interested @}
+interested == 1 @{ print @}
+$1 == "END" @{ interested = ! interested @}
+@end example
+
+@noindent
+The variable @code{interested}, like all @code{awk} variables, starts
+out initialized to zero, which is also false. When a line is seen whose
+first field is @samp{START}, the value of @code{interested} is toggled
+to true, using @samp{!}. The next rule prints lines as long as
+@code{interested} is true. When a line is seen whose first field is
+@samp{END}, @code{interested} is toggled back to false.
+@ignore
+We should discuss using `next' in the two rules that toggle the
+variable, to avoid printing the bracketing lines, but that's more
+distraction than really needed.
+@end ignore
+
+@node Conditional Exp, Function Calls, Boolean Ops, Expressions
+@section Conditional Expressions
+@cindex conditional expression
+@cindex expression, conditional
+
+A @dfn{conditional expression} is a special kind of expression with
+three operands. It allows you to use one expression's value to select
+one of two other expressions.
+
+The conditional expression is the same as in the C language:
+
+@example
+@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
+@end example
+
+@noindent
+There are three subexpressions. The first, @var{selector}, is always
+computed first. If it is ``true'' (not zero and not null) then
+@var{if-true-exp} is computed next and its value becomes the value of
+the whole expression. Otherwise, @var{if-false-exp} is computed next
+and its value becomes the value of the whole expression.
+
+For example, this expression produces the absolute value of @code{x}:
+
+@example
+x > 0 ? x : -x
+@end example
+
+Each time the conditional expression is computed, exactly one of
+@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
+This is important when the expressions contain side effects. For example,
+this conditional expression examines element @code{i} of either array
+@code{a} or array @code{b}, and increments @code{i}.
+
+@example
+x == y ? a[i++] : b[i++]
+@end example
+
+@noindent
+This is guaranteed to increment @code{i} exactly once, because each time
+only one of the two increment expressions is executed,
+and the other is not.
+@xref{Arrays, ,Arrays in @code{awk}},
+for more information about arrays.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex line continuation
+As a minor @code{gawk} extension,
+you can continue a statement that uses @samp{?:} simply
+by putting a newline after either character.
+However, you cannot put a newline in front
+of either character without using backslash continuation
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@node Function Calls, Precedence, Conditional Exp, Expressions
+@section Function Calls
+@cindex function call
+@cindex calling a function
+
+A @dfn{function} is a name for a particular calculation. Because it has
+a name, you can ask for it by name at any point in the program. For
+example, the function @code{sqrt} computes the square root of a number.
+
+A fixed set of functions are @dfn{built-in}, which means they are
+available in every @code{awk} program. The @code{sqrt} function is one
+of these. @xref{Built-in, ,Built-in Functions}, for a list of built-in
+functions and their descriptions. In addition, you can define your own
+functions for use in your program.
+@xref{User-defined, ,User-defined Functions}, for how to do this.
+
+@cindex arguments in function call
+The way to use a function is with a @dfn{function call} expression,
+which consists of the function name followed immediately by a list of
+@dfn{arguments} in parentheses. The arguments are expressions which
+provide the raw materials for the function's calculations.
+When there is more than one argument, they are separated by commas. If
+there are no arguments, write just @samp{()} after the function name.
+Here are some examples:
+
+@example
+sqrt(x^2 + y^2) @i{one argument}
+atan2(y, x) @i{two arguments}
+rand() @i{no arguments}
+@end example
+
+@strong{Do not put any space between the function name and the
+open-parenthesis!} A user-defined function name looks just like the name of
+a variable, and space would make the expression look like concatenation
+of a variable with an expression inside parentheses. Space before the
+parenthesis is harmless with built-in functions, but it is best not to get
+into the habit of using space to avoid mistakes with user-defined
+functions.
+
+Each function expects a particular number of arguments. For example, the
+@code{sqrt} function must be called with a single argument, the number
+to take the square root of:
+
+@example
+sqrt(@var{argument})
+@end example
+
+Some of the built-in functions allow you to omit the final argument.
+If you do so, they use a reasonable default.
+@xref{Built-in, ,Built-in Functions}, for full details. If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables, initialized to the empty string
+(@pxref{User-defined, ,User-defined Functions}).
+
+Like every other expression, the function call has a value, which is
+computed by the function based on the arguments you give it. In this
+example, the value of @samp{sqrt(@var{argument})} is the square root of
+@var{argument}. A function can also have side effects, such as assigning
+values to certain variables or doing I/O.
+
+Here is a command to read numbers, one number per line, and print the
+square root of each one:
+
+@example
+@group
+$ awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
+1
+@print{} The square root of 1 is 1
+3
+@print{} The square root of 3 is 1.73205
+5
+@print{} The square root of 5 is 2.23607
+@kbd{Control-d}
+@end group
+@end example
+
+@node Precedence, , Function Calls, Expressions
+@section Operator Precedence (How Operators Nest)
+@cindex precedence
+@cindex operator precedence
+
+@dfn{Operator precedence} determines how operators are grouped, when
+different operators appear close by in one expression. For example,
+@samp{*} has higher precedence than @samp{+}; thus, @samp{a + b * c}
+means to multiply @code{b} and @code{c}, and then add @code{a} to the
+product (i.e.@: @samp{a + (b * c)}).
+
+You can overrule the precedence of the operators by using parentheses.
+You can think of the precedence rules as saying where the
+parentheses are assumed to be if you do not write parentheses yourself. In
+fact, it is wise to always use parentheses whenever you have an unusual
+combination of operators, because other people who read the program may
+not remember what the precedence is in this case. You might forget,
+too; then you could make a mistake. Explicit parentheses will help prevent
+any such mistake.
+
+When operators of equal precedence are used together, the leftmost
+operator groups first, except for the assignment, conditional and
+exponentiation operators, which group in the opposite order.
+Thus, @samp{a - b + c} groups as @samp{(a - b) + c}, and
+@samp{a = b = c} groups as @samp{a = (b = c)}.
+
+The precedence of prefix unary operators does not matter as long as only
+unary operators are involved, because there is only one way to interpret
+them---innermost first. Thus, @samp{$++i} means @samp{$(++i)} and
+@samp{++$x} means @samp{++($x)}. However, when another operator follows
+the operand, then the precedence of the unary operators can matter.
+Thus, @samp{$x^2} means @samp{($x)^2}, but @samp{-x^2} means
+@samp{-(x^2)}, because @samp{-} has lower precedence than @samp{^}
+while @samp{$} has higher precedence.
+
+Here is a table of @code{awk}'s operators, in order from highest
+precedence to lowest:
+
+@c use @code in the items, looks better in TeX w/o all the quotes
+@table @code
+@item (@dots{})
+Grouping.
+
+@item $
+Field.
+
+@item ++ --
+Increment, decrement.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item ^ **
+Exponentiation. These operators group right-to-left.
+(The @samp{**} operator is not specified by POSIX.)
+
+@item + - !
+Unary plus, minus, logical ``not''.
+
+@item * / %
+Multiplication, division, modulus.
+
+@item + -
+Addition, subtraction.
+
+@item @r{Concatenation}
+No special token is used to indicate concatenation.
+The operands are simply written side by side.
+
+@item < <= == !=
+@itemx > >= >> |
+Relational, and redirection.
+The relational operators and the redirections have the same precedence
+level. Characters such as @samp{>} serve both as relationals and as
+redirections; the context distinguishes between the two meanings.
+
+Note that the I/O redirection operators in @code{print} and @code{printf}
+statements belong to the statement level, not to expressions. The
+redirection does not produce an expression which could be the operand of
+another operator. As a result, it does not make sense to use a
+redirection operator near another operator of lower precedence, without
+parentheses. Such combinations, for example @samp{print foo > a ? b : c},
+result in syntax errors.
+The correct way to write this statement is @samp{print foo > (a ? b : c)}.
+
+@item ~ !~
+Matching, non-matching.
+
+@item in
+Array membership.
+
+@item &&
+Logical ``and''.
+
+@item ||
+Logical ``or''.
+
+@item ?:
+Conditional. This operator groups right-to-left.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item = += -= *=
+@itemx /= %= ^= **=
+Assignment. These operators group right-to-left.
+(The @samp{**=} operator is not specified by POSIX.)
+@end table
+
+@node Patterns and Actions, Statements, Expressions, Top
+@chapter Patterns and Actions
+@cindex pattern, definition of
+
+As you have already seen, each @code{awk} statement consists of
+a pattern with an associated action. This chapter describes how
+you build patterns and actions.
+
+@menu
+* Pattern Overview:: What goes into a pattern.
+* Action Overview:: What goes into an action.
+@end menu
+
+@node Pattern Overview, Action Overview, Patterns and Actions, Patterns and Actions
+@section Pattern Elements
+
+Patterns in @code{awk} control the execution of rules: a rule is
+executed when its pattern matches the current input record. This
+section explains all about how to write patterns.
+
+@menu
+* Kinds of Patterns:: A list of all kinds of patterns.
+* Regexp Patterns:: Using regexps as patterns.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* Empty:: The empty pattern, which matches every record.
+@end menu
+
+@node Kinds of Patterns, Regexp Patterns, Pattern Overview, Pattern Overview
+@subsection Kinds of Patterns
+@cindex patterns, types of
+
+Here is a summary of the types of patterns supported in @code{awk}.
+
+@table @code
+@item /@var{regular expression}/
+A regular expression as a pattern. It matches when the text of the
+input record fits the regular expression.
+(@xref{Regexp, ,Regular Expressions}.)
+
+@item @var{expression}
+A single expression. It matches when its value
+is non-zero (if a number) or non-null (if a string).
+(@xref{Expression Patterns, ,Expressions as Patterns}.)
+
+@item @var{pat1}, @var{pat2}
+A pair of patterns separated by a comma, specifying a range of records.
+The range includes both the initial record that matches @var{pat1}, and
+the final record that matches @var{pat2}.
+(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
+
+@item BEGIN
+@itemx END
+Special patterns for you to supply start-up or clean-up actions for your
+@code{awk} program.
+(@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.)
+
+@item @var{empty}
+The empty pattern matches every input record.
+(@xref{Empty, ,The Empty Pattern}.)
+@end table
+
+@node Regexp Patterns, Expression Patterns, Kinds of Patterns, Pattern Overview
+@subsection Regular Expressions as Patterns
+
+We have been using regular expressions as patterns since our early examples.
+This kind of pattern is simply a regexp constant in the pattern part of
+a rule. Its meaning is @samp{$0 ~ /@var{pattern}/}.
+The pattern matches when the input record matches the regexp.
+For example:
+
+@example
+/foo|bar|baz/ @{ buzzwords++ @}
+END @{ print buzzwords, "buzzwords seen" @}
+@end example
+
+@node Expression Patterns, Ranges, Regexp Patterns, Pattern Overview
+@subsection Expressions as Patterns
+
+Any @code{awk} expression is valid as an @code{awk} pattern.
+Then the pattern matches if the expression's value is non-zero (if a
+number) or non-null (if a string).
+
+The expression is reevaluated each time the rule is tested against a new
+input record. If the expression uses fields such as @code{$1}, the
+value depends directly on the new input record's text; otherwise, it
+depends only on what has happened so far in the execution of the
+@code{awk} program, but that may still be useful.
+
+A very common kind of expression used as a pattern is the comparison
+expression, using the comparison operators described in
+@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+Regexp matching and non-matching are also very common expressions.
+The left operand of the @samp{~} and @samp{!~} operators is a string.
+The right operand is either a constant regular expression enclosed in
+slashes (@code{/@var{regexp}/}), or any expression, whose string value
+is used as a dynamic regular expression
+(@pxref{Computed Regexps, , Using Dynamic Regexps}).
+
+The following example prints the second field of each input record
+whose first field is precisely @samp{foo}.
+
+@example
+$ awk '$1 == "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+(There is no output, since there is no BBS site named ``foo''.)
+Contrast this with the following regular expression match, which would
+accept any record with a first field that contains @samp{foo}:
+
+@example
+@group
+$ awk '$1 ~ /foo/ @{ print $2 @}' BBS-list
+@print{} 555-1234
+@print{} 555-6699
+@print{} 555-6480
+@print{} 555-2127
+@end group
+@end example
+
+Boolean expressions are also commonly used as patterns.
+Whether the pattern
+matches an input record depends on whether its subexpressions match.
+
+For example, the following command prints all records in
+@file{BBS-list} that contain both @samp{2400} and @samp{foo}.
+
+@example
+$ awk '/2400/ && /foo/' BBS-list
+@print{} fooey 555-1234 2400/1200/300 B
+@end example
+
+The following command prints all records in
+@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo}, or
+both.
+
+@example
+@group
+$ awk '/2400/ || /foo/' BBS-list
+@print{} alpo-net 555-3412 2400/1200/300 A
+@print{} bites 555-1675 2400/1200/300 A
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sdace 555-3430 2400/1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+The following command prints all records in
+@file{BBS-list} that do @emph{not} contain the string @samp{foo}.
+
+@example
+@group
+$ awk '! /foo/' BBS-list
+@print{} aardvark 555-5553 1200/300 B
+@print{} alpo-net 555-3412 2400/1200/300 A
+@print{} barfly 555-7685 1200/300 A
+@print{} bites 555-1675 2400/1200/300 A
+@print{} camelot 555-0542 300 C
+@print{} core 555-2912 1200/300 C
+@print{} sdace 555-3430 2400/1200/300 A
+@end group
+@end example
+
+The subexpressions of a boolean operator in a pattern can be constant regular
+expressions, comparisons, or any other @code{awk} expressions. Range
+patterns are not expressions, so they cannot appear inside boolean
+patterns. Likewise, the special patterns @code{BEGIN} and @code{END},
+which never match any input record, are not expressions and cannot
+appear inside boolean patterns.
+
+A regexp constant as a pattern is also a special case of an expression
+pattern. @code{/foo/} as an expression has the value one if @samp{foo}
+appears in the current input record; thus, as a pattern, @code{/foo/}
+matches any record containing @samp{foo}.
+
+@node Ranges, BEGIN/END, Expression Patterns, Pattern Overview
+@subsection Specifying Record Ranges with Patterns
+
+@cindex range pattern
+@cindex pattern, range
+@cindex matching ranges of lines
+A @dfn{range pattern} is made of two patterns separated by a comma, of
+the form @samp{@var{begpat}, @var{endpat}}. It matches ranges of
+consecutive input records. The first pattern, @var{begpat}, controls
+where the range begins, and the second one, @var{endpat}, controls where
+it ends. For example,
+
+@example
+awk '$1 == "on", $1 == "off"'
+@end example
+
+@noindent
+prints every record between @samp{on}/@samp{off} pairs, inclusive.
+
+A range pattern starts out by matching @var{begpat}
+against every input record; when a record matches @var{begpat}, the
+range pattern becomes @dfn{turned on}. The range pattern matches this
+record. As long as it stays turned on, it automatically matches every
+input record read. It also matches @var{endpat} against
+every input record; when that succeeds, the range pattern is turned
+off again for the following record. Then it goes back to checking
+@var{begpat} against each record.
+
+The record that turns on the range pattern and the one that turns it
+off both match the range pattern. If you don't want to operate on
+these records, you can write @code{if} statements in the rule's action
+to distinguish them from the records you are interested in.
+
+It is possible for a pattern to be turned both on and off by the same
+record, if the record satisfies both conditions. Then the action is
+executed for just that record.
+
+For example, suppose you have text between two identical markers (say
+the @samp{%} symbol) that you wish to ignore. You might try to
+combine a range pattern that describes the delimited text with the
+@code{next} statement
+(not discussed yet, @pxref{Next Statement, , The @code{next} Statement}),
+which causes @code{awk} to skip any further processing of the current
+record and start over again with the next input record. Such a program
+would look like this:
+
+@example
+/^%$/,/^%$/ @{ next @}
+ @{ print @}
+@end example
+
+@noindent
+@cindex skipping lines between markers
+This program fails because the range pattern is both turned on and turned off
+by the first line with just a @samp{%} on it. To accomplish this task, you
+must write the program this way, using a flag:
+
+@example
+/^%$/ @{ skip = ! skip; next @}
+skip == 1 @{ next @} # skip lines with `skip' set
+@end example
+
+Note that in a range pattern, the @samp{,} has the lowest precedence
+(is evaluated last) of all the operators. Thus, for example, the
+following program attempts to combine a range pattern with another,
+simpler test.
+
+@example
+echo Yes | awk '/1/,/2/ || /Yes/'
+@end example
+
+The author of this program intended it to mean @samp{(/1/,/2/) || /Yes/}.
+However, @code{awk} interprets this as @samp{/1/, (/2/ || /Yes/)}.
+This cannot be changed or worked around; range patterns do not combine
+with other patterns.
+
+@node BEGIN/END, Empty, Ranges, Pattern Overview
+@subsection The @code{BEGIN} and @code{END} Special Patterns
+
+@cindex @code{BEGIN} special pattern
+@cindex pattern, @code{BEGIN}
+@cindex @code{END} special pattern
+@cindex pattern, @code{END}
+@code{BEGIN} and @code{END} are special patterns. They are not used to
+match input records. Rather, they supply start-up or
+clean-up actions for your @code{awk} script.
+
+@menu
+* Using BEGIN/END:: How and why to use BEGIN/END rules.
+* I/O And BEGIN/END:: I/O issues in BEGIN/END rules.
+@end menu
+
+@node Using BEGIN/END, I/O And BEGIN/END, BEGIN/END, BEGIN/END
+@subsubsection Startup and Cleanup Actions
+
+A @code{BEGIN} rule is executed, once, before the first input record
+has been read. An @code{END} rule is executed, once, after all the
+input has been read. For example:
+
+@example
+@group
+$ awk '
+> BEGIN @{ print "Analysis of \"foo\"" @}
+> /foo/ @{ ++n @}
+> END @{ print "\"foo\" appears " n " times." @}' BBS-list
+@print{} Analysis of "foo"
+@print{} "foo" appears 4 times.
+@end group
+@end example
+
+This program finds the number of records in the input file @file{BBS-list}
+that contain the string @samp{foo}. The @code{BEGIN} rule prints a title
+for the report. There is no need to use the @code{BEGIN} rule to
+initialize the counter @code{n} to zero, as @code{awk} does this
+automatically (@pxref{Variables}).
+
+The second rule increments the variable @code{n} every time a
+record containing the pattern @samp{foo} is read. The @code{END} rule
+prints the value of @code{n} at the end of the run.
+
+The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
+or with boolean operators (indeed, they cannot be used with any operators).
+
+An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
+rules. They are executed in the order they appear, all the @code{BEGIN}
+rules at start-up and all the @code{END} rules at termination.
+@code{BEGIN} and @code{END} rules may be intermixed with other rules.
+This feature was added in the 1987 version of @code{awk}, and is included
+in the POSIX standard. The original (1978) version of @code{awk}
+required you to put the @code{BEGIN} rule at the beginning of the
+program, and the @code{END} rule at the end, and only allowed one of
+each. This is no longer required, but it is a good idea in terms of
+program organization and readability.
+
+Multiple @code{BEGIN} and @code{END} rules are useful for writing
+library functions, since each library file can have its own @code{BEGIN} and/or
+@code{END} rule to do its own initialization and/or cleanup. Note that
+the order in which library functions are named on the command line
+controls the order in which their @code{BEGIN} and @code{END} rules are
+executed. Therefore you have to be careful to write such rules in
+library files so that the order in which they are executed doesn't matter.
+@xref{Options, ,Command Line Options}, for more information on
+using library functions.
+@xref{Library Functions, ,A Library of @code{awk} Functions},
+for a number of useful library functions.
+
+@cindex dark corner
+If an @code{awk} program only has a @code{BEGIN} rule, and no other
+rules, then the program exits after the @code{BEGIN} rule has been run.
+(The original version of @code{awk} used to keep reading and ignoring input
+until end of file was seen.) However, if an @code{END} rule exists,
+then the input will be read, even if there are no other rules in
+the program. This is necessary in case the @code{END} rule checks the
+@code{FNR} and @code{NR} variables (d.c.).
+
+@code{BEGIN} and @code{END} rules must have actions; there is no default
+action for these rules since there is no current record when they run.
+
+@node I/O And BEGIN/END, , Using BEGIN/END, BEGIN/END
+@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules
+
+@cindex I/O from @code{BEGIN} and @code{END}
+There are several (sometimes subtle) issues involved when doing I/O
+from a @code{BEGIN} or @code{END} rule.
+
+The first has to do with the value of @code{$0} in a @code{BEGIN}
+rule. Since @code{BEGIN} rules are executed before any input is read,
+there simply is no input record, and therefore no fields, when
+executing @code{BEGIN} rules. References to @code{$0} and the fields
+yield a null string or zero, depending upon the context. One way
+to give @code{$0} a real value is to execute a @code{getline} command
+without a variable (@pxref{Getline, ,Explicit Input with @code{getline}}).
+Another way is to simply assign a value to it.
+
+@cindex differences between @code{gawk} and @code{awk}
+The second point is similar to the first, but from the other direction.
+Inside an @code{END} rule, what is the value of @code{$0} and @code{NF}?
+Traditionally, due largely to implementation issues, @code{$0} and
+@code{NF} were @emph{undefined} inside an @code{END} rule.
+The POSIX standard specified that @code{NF} was available in an @code{END}
+rule, containing the number of fields from the last input record.
+Due most probably to an oversight, the standard does not say that @code{$0}
+is also preserved, although logically one would think that it should be.
+In fact, @code{gawk} does preserve the value of @code{$0} for use in
+@code{END} rules. Be aware, however, that Unix @code{awk}, and possibly
+other implementations, do not.
+
+The third point follows from the first two. What is the meaning of
+@samp{print} inside a @code{BEGIN} or @code{END} rule? The meaning is
+the same as always, @samp{print $0}. If @code{$0} is the null string,
+then this prints an empty line. Many long time @code{awk} programmers
+use @samp{print} in @code{BEGIN} and @code{END} rules, to mean
+@samp{@w{print ""}}, relying on @code{$0} being null. While you might
+generally get away with this in @code{BEGIN} rules, in @code{gawk} at
+least, it is a very bad idea in @code{END} rules. It is also poor
+style, since if you want an empty line in the output, you
+should say so explicitly in your program.
+
+@node Empty, , BEGIN/END, Pattern Overview
+@subsection The Empty Pattern
+
+@cindex empty pattern
+@cindex pattern, empty
+An empty (i.e.@: non-existent) pattern is considered to match @emph{every}
+input record. For example, the program:
+
+@example
+awk '@{ print $1 @}' BBS-list
+@end example
+
+@noindent
+prints the first field of every record.
+
+@node Action Overview, , Pattern Overview, Patterns and Actions
+@section Overview of Actions
+@cindex action, definition of
+@cindex curly braces
+@cindex action, curly braces
+@cindex action, separating statements
+
+An @code{awk} program or script consists of a series of
+rules and function definitions, interspersed. (Functions are
+described later. @xref{User-defined, ,User-defined Functions}.)
+
+A rule contains a pattern and an action, either of which (but not
+both) may be
+omitted. The purpose of the @dfn{action} is to tell @code{awk} what to do
+once a match for the pattern is found. Thus, in outline, an @code{awk}
+program generally looks like this:
+
+@example
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@dots{}
+function @var{name}(@var{args}) @{ @dots{} @}
+@dots{}
+@end example
+
+An action consists of one or more @code{awk} @dfn{statements}, enclosed
+in curly braces (@samp{@{} and @samp{@}}). Each statement specifies one
+thing to be done. The statements are separated by newlines or
+semicolons.
+
+The curly braces around an action must be used even if the action
+contains only one statement, or even if it contains no statements at
+all. However, if you omit the action entirely, omit the curly braces as
+well. An omitted action is equivalent to @samp{@{ print $0 @}}.
+
+@example
+/foo/ @{ @} # match foo, do nothing - empty action
+/foo/ # match foo, print the record - omitted action
+@end example
+
+Here are the kinds of statements supported in @code{awk}:
+
+@itemize @bullet
+@item
+Expressions, which can call functions or assign values to variables
+(@pxref{Expressions}). Executing
+this kind of statement simply computes the value of the expression.
+This is useful when the expression has side effects
+(@pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Control statements, which specify the control flow of @code{awk}
+programs. The @code{awk} language gives you C-like constructs
+(@code{if}, @code{for}, @code{while}, and @code{do}) as well as a few
+special ones (@pxref{Statements, ,Control Statements in Actions}).
+
+@item
+Compound statements, which consist of one or more statements enclosed in
+curly braces. A compound statement is used in order to put several
+statements together in the body of an @code{if}, @code{while}, @code{do}
+or @code{for} statement.
+
+@item
+Input statements, using the @code{getline} command
+(@pxref{Getline, ,Explicit Input with @code{getline}}), the @code{next}
+statement (@pxref{Next Statement, ,The @code{next} Statement}),
+and the @code{nextfile} statement
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+
+@item
+Output statements, @code{print} and @code{printf}.
+@xref{Printing, ,Printing Output}.
+
+@item
+Deletion statements, for deleting array elements.
+@xref{Delete, ,The @code{delete} Statement}.
+@end itemize
+
+@iftex
+The next chapter covers control statements in detail.
+@end iftex
+
+@node Statements, Built-in Variables, Patterns and Actions, Top
+@chapter Control Statements in Actions
+@cindex control statement
+
+@dfn{Control statements} such as @code{if}, @code{while}, and so on
+control the flow of execution in @code{awk} programs. Most of the
+control statements in @code{awk} are patterned on similar statements in
+C.
+
+All the control statements start with special keywords such as @code{if}
+and @code{while}, to distinguish them from simple expressions.
+
+@cindex compound statement
+@cindex statement, compound
+Many control statements contain other statements; for example, the
+@code{if} statement contains another statement which may or may not be
+executed. The contained statement is called the @dfn{body}. If you
+want to include more than one statement in the body, group them into a
+single @dfn{compound statement} with curly braces, separating them with
+newlines or semicolons.
+
+@menu
+* If Statement:: Conditionally execute some @code{awk}
+ statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost enclosing
+ loop.
+* Next Statement:: Stop processing the current input record.
+* Nextfile Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @code{awk}.
+@end menu
+
+@node If Statement, While Statement, Statements, Statements
+@section The @code{if}-@code{else} Statement
+
+@cindex @code{if}-@code{else} statement
+The @code{if}-@code{else} statement is @code{awk}'s decision-making
+statement. It looks like this:
+
+@example
+if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
+@end example
+
+@noindent
+The @var{condition} is an expression that controls what the rest of the
+statement will do. If @var{condition} is true, @var{then-body} is
+executed; otherwise, @var{else-body} is executed.
+The @code{else} part of the statement is
+optional. The condition is considered false if its value is zero or
+the null string, and true otherwise.
+
+Here is an example:
+
+@example
+if (x % 2 == 0)
+ print "x is even"
+else
+ print "x is odd"
+@end example
+
+In this example, if the expression @samp{x % 2 == 0} is true (that is,
+the value of @code{x} is evenly divisible by two), then the first @code{print}
+statement is executed, otherwise the second @code{print} statement is
+executed.
+
+If the @code{else} appears on the same line as @var{then-body}, and
+@var{then-body} is not a compound statement (i.e.@: not surrounded by
+curly braces), then a semicolon must separate @var{then-body} from
+@code{else}. To illustrate this, let's rewrite the previous example:
+
+@example
+if (x % 2 == 0) print "x is even"; else
+ print "x is odd"
+@end example
+
+@noindent
+If you forget the @samp{;}, @code{awk} won't be able to interpret the
+statement, and you will get a syntax error.
+
+We would not actually write this example this way, because a human
+reader might fail to see the @code{else} if it were not the first thing
+on its line.
+
+@node While Statement, Do Statement, If Statement, Statements
+@section The @code{while} Statement
+@cindex @code{while} statement
+@cindex loop
+@cindex body of a loop
+
+In programming, a @dfn{loop} means a part of a program that can
+be executed two or more times in succession.
+
+The @code{while} statement is the simplest looping statement in
+@code{awk}. It repeatedly executes a statement as long as a condition is
+true. It looks like this:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+Here @var{body} is a statement that we call the @dfn{body} of the loop,
+and @var{condition} is an expression that controls how long the loop
+keeps running.
+
+The first thing the @code{while} statement does is test @var{condition}.
+If @var{condition} is true, it executes the statement @var{body}.
+@ifinfo
+(The @var{condition} is true when the value
+is not zero and not a null string.)
+@end ifinfo
+After @var{body} has been executed,
+@var{condition} is tested again, and if it is still true, @var{body} is
+executed again. This process repeats until @var{condition} is no longer
+true. If @var{condition} is initially false, the body of the loop is
+never executed, and @code{awk} continues with the statement following
+the loop.
+
+This example prints the first three fields of each record, one per line.
+
+@example
+awk '@{ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
+@}' inventory-shipped
+@end example
+
+@noindent
+Here the body of the loop is a compound statement enclosed in braces,
+containing two statements.
+
+The loop works like this: first, the value of @code{i} is set to one.
+Then, the @code{while} tests whether @code{i} is less than or equal to
+three. This is true when @code{i} equals one, so the @code{i}-th
+field is printed. Then the @samp{i++} increments the value of @code{i}
+and the loop repeats. The loop terminates when @code{i} reaches four.
+
+As you can see, a newline is not required between the condition and the
+body; but using one makes the program clearer unless the body is a
+compound statement or is very simple. The newline after the open-brace
+that begins the compound statement is not required either, but the
+program would be harder to read without it.
+
+@node Do Statement, For Statement, While Statement, Statements
+@section The @code{do}-@code{while} Statement
+
+The @code{do} loop is a variation of the @code{while} looping statement.
+The @code{do} loop executes the @var{body} once, and then repeats @var{body}
+as long as @var{condition} is true. It looks like this:
+
+@example
+@group
+do
+ @var{body}
+while (@var{condition})
+@end group
+@end example
+
+Even if @var{condition} is false at the start, @var{body} is executed at
+least once (and only once, unless executing @var{body} makes
+@var{condition} true). Contrast this with the corresponding
+@code{while} statement:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+This statement does not execute @var{body} even once if @var{condition}
+is false to begin with.
+
+Here is an example of a @code{do} statement:
+
+@example
+awk '@{ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
+@}'
+@end example
+
+@noindent
+This program prints each input record ten times. It isn't a very
+realistic example, since in this case an ordinary @code{while} would do
+just as well. But this reflects actual experience; there is only
+occasionally a real use for a @code{do} statement.
+
+@node For Statement, Break Statement, Do Statement, Statements
+@section The @code{for} Statement
+@cindex @code{for} statement
+
+The @code{for} statement makes it more convenient to count iterations of a
+loop. The general form of the @code{for} statement looks like this:
+
+@example
+for (@var{initialization}; @var{condition}; @var{increment})
+ @var{body}
+@end example
+
+@noindent
+The @var{initialization}, @var{condition} and @var{increment} parts are
+arbitrary @code{awk} expressions, and @var{body} stands for any
+@code{awk} statement.
+
+The @code{for} statement starts by executing @var{initialization}.
+Then, as long
+as @var{condition} is true, it repeatedly executes @var{body} and then
+@var{increment}. Typically @var{initialization} sets a variable to
+either zero or one, @var{increment} adds one to it, and @var{condition}
+compares it against the desired number of iterations.
+
+Here is an example of a @code{for} statement:
+
+@example
+@group
+awk '@{ for (i = 1; i <= 3; i++)
+ print $i
+@}' inventory-shipped
+@end group
+@end example
+
+@noindent
+This prints the first three fields of each input record, one field per
+line.
+
+You cannot set more than one variable in the
+@var{initialization} part unless you use a multiple assignment statement
+such as @samp{x = y = 0}, which is possible only if all the initial values
+are equal. (But you can initialize additional variables by writing
+their assignments as separate statements preceding the @code{for} loop.)
+
+The same is true of the @var{increment} part; to increment additional
+variables, you must write separate statements at the end of the loop.
+The C compound expression, using C's comma operator, would be useful in
+this context, but it is not supported in @code{awk}.
+
+Most often, @var{increment} is an increment expression, as in the
+example above. But this is not required; it can be any expression
+whatever. For example, this statement prints all the powers of two
+between one and 100:
+
+@example
+for (i = 1; i <= 100; i *= 2)
+ print i
+@end example
+
+Any of the three expressions in the parentheses following the @code{for} may
+be omitted if there is nothing to be done there. Thus, @w{@samp{for (; x
+> 0;)}} is equivalent to @w{@samp{while (x > 0)}}. If the
+@var{condition} is omitted, it is treated as @var{true}, effectively
+yielding an @dfn{infinite loop} (i.e.@: a loop that will never
+terminate).
+
+In most cases, a @code{for} loop is an abbreviation for a @code{while}
+loop, as shown here:
+
+@example
+@var{initialization}
+while (@var{condition}) @{
+ @var{body}
+ @var{increment}
+@}
+@end example
+
+@noindent
+The only exception is when the @code{continue} statement
+(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
+inside the loop; changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.
+
+There is an alternate version of the @code{for} loop, for iterating over
+all the indices of an array:
+
+@example
+for (i in array)
+ @var{do something with} array[i]
+@end example
+
+@noindent
+@xref{Scanning an Array, ,Scanning All Elements of an Array},
+for more information on this version of the @code{for} loop.
+
+The @code{awk} language has a @code{for} statement in addition to a
+@code{while} statement because often a @code{for} loop is both less work to
+type and more natural to think of. Counting the number of iterations is
+very common in loops. It can be easier to think of this counting as part
+of looping rather than as something to do inside the loop.
+
+The next section has more complicated examples of @code{for} loops.
+
+@node Break Statement, Continue Statement, For Statement, Statements
+@section The @code{break} Statement
+@cindex @code{break} statement
+@cindex loops, exiting
+
+The @code{break} statement jumps out of the innermost @code{for},
+@code{while}, or @code{do} loop that encloses it. The
+following example finds the smallest divisor of any integer, and also
+identifies prime numbers:
+
+@example
+awk '# find smallest divisor of num
+ @{ num = $1
+ for (div = 2; div*div <= num; div++)
+ if (num % div == 0)
+ break
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num
+ @}'
+@end example
+
+When the remainder is zero in the first @code{if} statement, @code{awk}
+immediately @dfn{breaks out} of the containing @code{for} loop. This means
+that @code{awk} proceeds immediately to the statement following the loop
+and continues processing. (This is very different from the @code{exit}
+statement which stops the entire @code{awk} program.
+@xref{Exit Statement, ,The @code{exit} Statement}.)
+
+Here is another program equivalent to the previous one. It illustrates how
+the @var{condition} of a @code{for} or @code{while} could just as well be
+replaced with a @code{break} inside an @code{if}:
+
+@example
+@group
+awk '# find smallest divisor of num
+ @{ num = $1
+ for (div = 2; ; div++) @{
+ if (num % div == 0) @{
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ @}
+ if (div*div > num) @{
+ printf "%d is prime\n", num
+ break
+ @}
+ @}
+@}'
+@end group
+@end example
+
+@cindex @code{break}, outside of loops
+@cindex historical features
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex dark corner
+As described above, the @code{break} statement has no meaning when
+used outside the body of a loop. However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{break}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+Recent versions of Unix @code{awk} no longer allow this usage.
+@code{gawk} will support this use of @code{break} only if @samp{--traditional}
+has been specified on the command line
+(@pxref{Options, ,Command Line Options}).
+Otherwise, it will be treated as an error, since the POSIX standard
+specifies that @code{break} should only be used inside the body of a
+loop (d.c.).
+
+@node Continue Statement, Next Statement, Break Statement, Statements
+@section The @code{continue} Statement
+
+@cindex @code{continue} statement
+The @code{continue} statement, like @code{break}, is used only inside
+@code{for}, @code{while}, and @code{do} loops. It skips
+over the rest of the loop body, causing the next cycle around the loop
+to begin immediately. Contrast this with @code{break}, which jumps out
+of the loop altogether.
+
+@c The point of this program was to illustrate the use of continue with
+@c a while loop. But Karl Berry points out that that is done adequately
+@c below, and that this example is very un-awk-like. So for now, we'll
+@c omit it.
+@ignore
+In Texinfo source files, text that the author wishes to ignore can be
+enclosed between lines that start with @samp{@@ignore} and end with
+@samp{@@end ignore}. Here is a program that strips out lines between
+@samp{@@ignore} and @samp{@@end ignore} pairs.
+
+@example
+BEGIN @{
+ while (getline > 0) @{
+ if (/^@@ignore/)
+ ignoring = 1
+ else if (/^@@end[ \t]+ignore/) @{
+ ignoring = 0
+ continue
+ @}
+ if (ignoring)
+ continue
+ print
+ @}
+@}
+@end example
+
+When an @samp{@@ignore} is seen, the @code{ignoring} flag is set to one (true).
+When @samp{@@end ignore} is seen, the flag is reset to zero (false). As long
+as the flag is true, the input record is not printed, because the
+@code{continue} restarts the @code{while} loop, skipping over the @code{print}
+statement.
+
+@c Exercise!!!
+@c How could this program be written to make better use of the awk language?
+@end ignore
+
+The @code{continue} statement in a @code{for} loop directs @code{awk} to
+skip the rest of the body of the loop, and resume execution with the
+increment-expression of the @code{for} statement. The following program
+illustrates this fact:
+
+@example
+awk 'BEGIN @{
+ for (x = 0; x <= 20; x++) @{
+ if (x == 5)
+ continue
+ printf "%d ", x
+ @}
+ print ""
+@}'
+@end example
+
+@noindent
+This program prints all the numbers from zero to 20, except for five, for
+which the @code{printf} is skipped. Since the increment @samp{x++}
+is not skipped, @code{x} does not remain stuck at five. Contrast the
+@code{for} loop above with this @code{while} loop:
+
+@example
+awk 'BEGIN @{
+ x = 0
+ while (x <= 20) @{
+ if (x == 5)
+ continue
+ printf "%d ", x
+ x++
+ @}
+ print ""
+@}'
+@end example
+
+@noindent
+This program loops forever once @code{x} gets to five.
+
+@cindex @code{continue}, outside of loops
+@cindex historical features
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex dark corner
+As described above, the @code{continue} statement has no meaning when
+used outside the body of a loop. However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{continue}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+Recent versions of Unix @code{awk} no longer allow this usage.
+@code{gawk} will support this use of @code{continue} only if
+@samp{--traditional} has been specified on the command line
+(@pxref{Options, ,Command Line Options}).
+Otherwise, it will be treated as an error, since the POSIX standard
+specifies that @code{continue} should only be used inside the body of a
+loop (d.c.).
+
+@node Next Statement, Nextfile Statement, Continue Statement, Statements
+@section The @code{next} Statement
+@cindex @code{next} statement
+
+The @code{next} statement forces @code{awk} to immediately stop processing
+the current record and go on to the next record. This means that no
+further rules are executed for the current record. The rest of the
+current rule's action is not executed either.
+
+Contrast this with the effect of the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}). That too causes
+@code{awk} to read the next record immediately, but it does not alter the
+flow of control in any way. So the rest of the current action executes
+with a new input record.
+
+At the highest level, @code{awk} program execution is a loop that reads
+an input record and then tests each rule's pattern against it. If you
+think of this loop as a @code{for} statement whose body contains the
+rules, then the @code{next} statement is analogous to a @code{continue}
+statement: it skips to the end of the body of this implicit loop, and
+executes the increment (which reads another record).
+
+For example, if your @code{awk} program works only on records with four
+fields, and you don't want it to fail when given bad input, you might
+use this rule near the beginning of the program:
+
+@example
+@group
+NF != 4 @{
+ err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR)
+ print err > "/dev/stderr"
+ next
+@}
+@end group
+@end example
+
+@noindent
+so that the following rules will not see the bad record. The error
+message is redirected to the standard error output stream, as error
+messages should be. @xref{Special Files, ,Special File Names in @code{gawk}}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, the behavior is undefined if
+the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@code{gawk} will treat it as a syntax error.
+Although POSIX permits it,
+some other @code{awk} implementations don't allow the @code{next}
+statement inside function bodies
+(@pxref{User-defined, ,User-defined Functions}).
+Just as any other @code{next} statement, a @code{next} inside a
+function body reads the next record and starts processing it with the
+first rule in the program.
+
+If the @code{next} statement causes the end of the input to be reached,
+then the code in any @code{END} rules will be executed.
+@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.
+
+@cindex @code{next}, inside a user-defined function
+@strong{Caution:} Some @code{awk} implementations generate a run-time
+error if you use the @code{next} statement inside a user-defined function
+(@pxref{User-defined, , User-defined Functions}).
+@code{gawk} does not have this problem.
+
+@node Nextfile Statement, Exit Statement, Next Statement, Statements
+@section The @code{nextfile} Statement
+@cindex @code{nextfile} statement
+@cindex differences between @code{gawk} and @code{awk}
+
+@code{gawk} provides the @code{nextfile} statement,
+which is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{nextfile} statement instructs @code{gawk} to stop processing the
+current data file.
+
+Upon execution of the @code{nextfile} statement, @code{FILENAME} is
+updated to the name of the next data file listed on the command line,
+@code{FNR} is reset to one, @code{ARGIND} is incremented, and processing
+starts over with the first rule in the progam. @xref{Built-in Variables}.
+
+If the @code{nextfile} statement causes the end of the input to be reached,
+then the code in any @code{END} rules will be executed.
+@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.
+
+The @code{nextfile} statement is a @code{gawk} extension; it is not
+(currently) available in any other @code{awk} implementation.
+@xref{Nextfile Function, ,Implementing @code{nextfile} as a Function},
+for a user-defined function you can use to simulate the @code{nextfile}
+statement.
+
+The @code{nextfile} statement would be useful if you have many data
+files to process, and you expect that you
+would not want to process every record in every file.
+Normally, in order to move on to
+the next data file, you would have to continue scanning the unwanted
+records. The @code{nextfile} statement accomplishes this much more
+efficiently.
+
+@cindex @code{next file} statement
+@strong{Caution:} Versions of @code{gawk} prior to 3.0 used two
+words (@samp{next file}) for the @code{nextfile} statement. This was
+changed in 3.0 to one word, since the treatment of @samp{file} was
+inconsistent. When it appeared after @code{next}, it was a keyword.
+Otherwise, it was a regular identifier. The old usage is still
+accepted. However, @code{gawk} will generate a warning message, and
+support for @code{next file} will eventually be discontinued in a
+future version of @code{gawk}.
+
+@node Exit Statement, , Nextfile Statement, Statements
+@section The @code{exit} Statement
+
+@cindex @code{exit} statement
+The @code{exit} statement causes @code{awk} to immediately stop
+executing the current rule and to stop processing input; any remaining input
+is ignored. It looks like this:
+
+@example
+exit @r{[}@var{return code}@r{]}
+@end example
+
+If an @code{exit} statement is executed from a @code{BEGIN} rule the
+program stops processing everything immediately. No input records are
+read. However, if an @code{END} rule is present, it is executed
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+
+If @code{exit} is used as part of an @code{END} rule, it causes
+the program to stop immediately.
+
+An @code{exit} statement that is not part
+of a @code{BEGIN} or @code{END} rule stops the execution of any further
+automatic rules for the current record, skips reading any remaining input
+records, and executes
+the @code{END} rule if there is one.
+
+If you do not want the @code{END} rule to do its job in this case, you
+can set a variable to non-zero before the @code{exit} statement, and check
+that variable in the @code{END} rule.
+@xref{Assert Function, ,Assertions},
+for an example that does this.
+
+@cindex dark corner
+If an argument is supplied to @code{exit}, its value is used as the exit
+status code for the @code{awk} process. If no argument is supplied,
+@code{exit} returns status zero (success). In the case where an argument
+is supplied to a first @code{exit} statement, and then @code{exit} is
+called a second time with no argument, the previously supplied exit value
+is used (d.c.).
+
+For example, let's say you've discovered an error condition you really
+don't know how to handle. Conventionally, programs report this by
+exiting with a non-zero status. Your @code{awk} program can do this
+using an @code{exit} statement with a non-zero argument. Here is an
+example:
+
+@example
+@group
+BEGIN @{
+ if (("date" | getline date_now) < 0) @{
+ print "Can't get system date" > "/dev/stderr"
+ exit 1
+ @}
+ print "current date is", date_now
+ close("date")
+@}
+@end group
+@end example
+
+@node Built-in Variables, Arrays, Statements, Top
+@chapter Built-in Variables
+@cindex built-in variables
+
+Most @code{awk} variables are available for you to use for your own
+purposes; they never change except when your program assigns values to
+them, and never affect anything except when your program examines them.
+However, a few variables in @code{awk} have special built-in meanings.
+Some of them @code{awk} examines automatically, so that they enable you
+to tell @code{awk} how to do certain things. Others are set
+automatically by @code{awk}, so that they carry information from the
+internal workings of @code{awk} to your program.
+
+This chapter documents all the built-in variables of @code{gawk}. Most
+of them are also documented in the chapters describing their areas of
+activity.
+
+@menu
+* User-modified:: Built-in variables that you change to control
+ @code{awk}.
+* Auto-set:: Built-in variables where @code{awk} gives you
+ information.
+* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}.
+@end menu
+
+@node User-modified, Auto-set, Built-in Variables, Built-in Variables
+@section Built-in Variables that Control @code{awk}
+@cindex built-in variables, user modifiable
+
+This is an alphabetical list of the variables which you can change to
+control how @code{awk} does certain things. Those variables that are
+specific to @code{gawk} are marked with an asterisk, @samp{*}.
+
+@table @code
+@vindex CONVFMT
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item CONVFMT
+This string controls conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the POSIX standard.
+
+@vindex FIELDWIDTHS
+@item FIELDWIDTHS *
+This is a space separated list of columns that tells @code{gawk}
+how to split input with fixed, columnar boundaries. It is an
+experimental feature. Assigning to @code{FIELDWIDTHS}
+overrides the use of @code{FS} for field splitting.
+@xref{Constant Size, ,Reading Fixed-width Data}, for more information.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}), then @code{FIELDWIDTHS}
+has no special meaning, and field splitting operations are done based
+exclusively on the value of @code{FS}.
+
+@vindex FS
+@item FS
+@code{FS} is the input field separator
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+The value is a single-character string or a multi-character regular
+expression that matches the separations between fields in an input
+record. If the value is the null string (@code{""}), then each
+character in the record becomes a separate field.
+
+The default value is @w{@code{" "}}, a string consisting of a single
+space. As a special exception, this value means that any
+sequence of spaces, tabs, and/or newlines is a single separator.@footnote{In
+POSIX @code{awk}, newline does not count as whitespace.} It also causes
+spaces, tabs, and newlines at the beginning and end of a record to be ignored.
+
+You can set the value of @code{FS} on the command line using the
+@samp{-F} option:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+If @code{gawk} is using @code{FIELDWIDTHS} for field-splitting,
+assigning a value to @code{FS} will cause @code{gawk} to return to
+the normal, @code{FS}-based, field splitting. An easy way to do this
+is to simply say @samp{FS = FS}, perhaps with an explanatory comment.
+
+@vindex IGNORECASE
+@item IGNORECASE *
+If @code{IGNORECASE} is non-zero or non-null, then all string comparisons,
+and all regular expression matching are case-independent. Thus, regexp
+matching with @samp{~} and @samp{!~}, and the @code{gensub},
+@code{gsub}, @code{index}, @code{match}, @code{split} and @code{sub}
+functions, record termination with @code{RS}, and field splitting with
+@code{FS} all ignore case when doing their particular regexp operations.
+The value of @code{IGNORECASE} does @emph{not} affect array subscripting.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+then @code{IGNORECASE} has no special meaning, and string
+and regexp operations are always case-sensitive.
+
+@vindex OFMT
+@item OFMT
+This string controls conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
+printing with the @code{print} statement. It works by being passed, in
+effect, as the first argument to the @code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Its default value is @code{"%.6g"}. Earlier versions of @code{awk}
+also used @code{OFMT} to specify the format for converting numbers to
+strings in general expressions; this is now done by @code{CONVFMT}.
+
+@vindex OFS
+@item OFS
+This is the output field separator (@pxref{Output Separators}). It is
+output between the fields output by a @code{print} statement. Its
+default value is @w{@code{" "}}, a string consisting of a single space.
+
+@vindex ORS
+@item ORS
+This is the output record separator. It is output at the end of every
+@code{print} statement. Its default value is @code{"\n"}.
+(@xref{Output Separators}.)
+
+@vindex RS
+@item RS
+This is @code{awk}'s input record separator. Its default value is a string
+containing a single newline character, which means that an input record
+consists of a single line of text.
+It can also be the null string, in which case records are separated by
+runs of blank lines, or a regexp, in which case records are separated by
+matches of the regexp in the input text.
+(@xref{Records, ,How Input is Split into Records}.)
+
+@vindex SUBSEP
+@item SUBSEP
+@code{SUBSEP} is the subscript separator. It has the default value of
+@code{"\034"}, and is used to separate the parts of the indices of a
+multi-dimensional array. Thus, the expression @code{@w{foo["A", "B"]}}
+really accesses @code{foo["A\034B"]}
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).
+@end table
+
+@node Auto-set, ARGC and ARGV, User-modified, Built-in Variables
+@section Built-in Variables that Convey Information
+@cindex built-in variables, convey information
+
+This is an alphabetical list of the variables that are set
+automatically by @code{awk} on certain occasions in order to provide
+information to your program. Those variables that are specific to
+@code{gawk} are marked with an asterisk, @samp{*}.
+
+@table @code
+@vindex ARGC
+@vindex ARGV
+@item ARGC
+@itemx ARGV
+The command-line arguments available to @code{awk} programs are stored in
+an array called @code{ARGV}. @code{ARGC} is the number of command-line
+arguments present. @xref{Other Arguments, ,Other Command Line Arguments}.
+Unlike most @code{awk} arrays,
+@code{ARGV} is indexed from zero to @code{ARGC} @minus{} 1. For example:
+
+@example
+@group
+$ awk 'BEGIN @{
+> for (i = 0; i < ARGC; i++)
+> print ARGV[i]
+> @}' inventory-shipped BBS-list
+@print{} awk
+@print{} inventory-shipped
+@print{} BBS-list
+@end group
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}. The value of @code{ARGC} is three, one more than the
+index of the last element in @code{ARGV}, since the elements are numbered
+from zero.
+
+The names @code{ARGC} and @code{ARGV}, as well as the convention of indexing
+the array from zero to @code{ARGC} @minus{} 1, are derived from the C language's
+method of accessing command line arguments.
+@xref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}, for information
+about how @code{awk} uses these variables.
+
+@vindex ARGIND
+@item ARGIND *
+The index in @code{ARGV} of the current file being processed.
+Every time @code{gawk} opens a new data file for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the file name.
+When @code{gawk} is processing the input files, it is always
+true that @samp{FILENAME == ARGV[ARGIND]}.
+
+This variable is useful in file processing; it allows you to tell how far
+along you are in the list of data files, and to distinguish between
+successive instances of the same filename on the command line.
+
+While you can change the value of @code{ARGIND} within your @code{awk}
+program, @code{gawk} will automatically set it to a new value when the
+next file is opened.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+
+@vindex ENVIRON
+@item ENVIRON
+An associative array that contains the values of the environment. The array
+indices are the environment variable names; the values are the values of
+the particular environment variables. For example,
+@code{ENVIRON["HOME"]} might be @file{/home/arnold}. Changing this array
+does not affect the environment passed on to any programs that
+@code{awk} may spawn via redirection or the @code{system} function.
+(In a future version of @code{gawk}, it may do so.)
+
+Some operating systems may not have environment variables.
+On such systems, the @code{ENVIRON} array is empty (except for
+@w{@code{ENVIRON["AWKPATH"]}}).
+
+@vindex ERRNO
+@item ERRNO *
+If a system error occurs either doing a redirection for @code{getline},
+during a read for @code{getline}, or during a @code{close} operation,
+then @code{ERRNO} will contain a string describing the error.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+
+@cindex dark corner
+@vindex FILENAME
+@item FILENAME
+This is the name of the file that @code{awk} is currently reading.
+When no data files are listed on the command line, @code{awk} reads
+from the standard input, and @code{FILENAME} is set to @code{"-"}.
+@code{FILENAME} is changed each time a new file is read
+(@pxref{Reading Files, ,Reading Input Files}).
+Inside a @code{BEGIN} rule, the value of @code{FILENAME} is
+@code{""}, since there are no input files being processed
+yet.@footnote{Some early implementations of Unix @code{awk} initialized
+@code{FILENAME} to @code{"-"}, even if there were data files to be
+processed. This behavior was incorrect, and should not be relied
+upon in your programs.} (d.c.)
+
+@vindex FNR
+@item FNR
+@code{FNR} is the current record number in the current file. @code{FNR} is
+incremented each time a new record is read
+(@pxref{Getline, ,Explicit Input with @code{getline}}). It is reinitialized
+to zero each time a new input file is started.
+
+@vindex NF
+@item NF
+@code{NF} is the number of fields in the current input record.
+@code{NF} is set each time a new record is read, when a new field is
+created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).
+
+@vindex NR
+@item NR
+This is the number of input records @code{awk} has processed since
+the beginning of the program's execution
+(@pxref{Records, ,How Input is Split into Records}).
+@code{NR} is set each time a new record is read.
+
+@vindex RLENGTH
+@item RLENGTH
+@code{RLENGTH} is the length of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RLENGTH} is set by invoking the @code{match} function. Its value
+is the length of the matched string, or @minus{}1 if no match was found.
+
+@vindex RSTART
+@item RSTART
+@code{RSTART} is the start-index in characters of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RSTART} is set by invoking the @code{match} function. Its value
+is the position of the string where the matched substring starts, or zero
+if no match was found.
+
+@vindex RT
+@item RT *
+@code{RT} is set each time a record is read. It contains the input text
+that matched the text denoted by @code{RS}, the record separator.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+@end table
+
+@cindex dark corner
+A side note about @code{NR} and @code{FNR}.
+@code{awk} simply increments both of these variables
+each time it reads a record, instead of setting them to the absolute
+value of the number of records read. This means that your program can
+change these variables, and their new values will be incremented for
+each record (d.c.). For example:
+
+@example
+@group
+$ echo '1
+> 2
+> 3
+> 4' | awk 'NR == 2 @{ NR = 17 @}
+> @{ print NR @}'
+@print{} 1
+@print{} 17
+@print{} 18
+@print{} 19
+@end group
+@end example
+
+@noindent
+Before @code{FNR} was added to the @code{awk} language
+(@pxref{V7/SVR3.1, ,Major Changes between V7 and SVR3.1}),
+many @code{awk} programs used this feature to track the number of
+records in a file by resetting @code{NR} to zero when @code{FILENAME}
+changed.
+
+@node ARGC and ARGV, , Auto-set, Built-in Variables
+@section Using @code{ARGC} and @code{ARGV}
+
+In @ref{Auto-set, , Built-in Variables that Convey Information},
+you saw this program describing the information contained in @code{ARGC}
+and @code{ARGV}:
+
+@example
+@group
+$ awk 'BEGIN @{
+> for (i = 0; i < ARGC; i++)
+> print ARGV[i]
+> @}' inventory-shipped BBS-list
+@print{} awk
+@print{} inventory-shipped
+@print{} BBS-list
+@end group
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}.
+
+Notice that the @code{awk} program is not entered in @code{ARGV}. The
+other special command line options, with their arguments, are also not
+entered. But variable assignments on the command line @emph{are}
+treated as arguments, and do show up in the @code{ARGV} array.
+
+Your program can alter @code{ARGC} and the elements of @code{ARGV}.
+Each time @code{awk} reaches the end of an input file, it uses the next
+element of @code{ARGV} as the name of the next input file. By storing a
+different string there, your program can change which files are read.
+You can use @code{"-"} to represent the standard input. By storing
+additional elements and incrementing @code{ARGC} you can cause
+additional files to be read.
+
+If you decrease the value of @code{ARGC}, that eliminates input files
+from the end of the list. By recording the old value of @code{ARGC}
+elsewhere, your program can treat the eliminated arguments as
+something other than file names.
+
+To eliminate a file from the middle of the list, store the null string
+(@code{""}) into @code{ARGV} in place of the file's name. As a
+special feature, @code{awk} ignores file names that have been
+replaced with the null string.
+You may also use the @code{delete} statement to remove elements from
+@code{ARGV} (@pxref{Delete, ,The @code{delete} Statement}).
+
+All of these actions are typically done from the @code{BEGIN} rule,
+before actual processing of the input begins.
+@xref{Split Program, ,Splitting a Large File Into Pieces}, and see
+@ref{Tee Program, ,Duplicating Output Into Multiple Files}, for an example
+of each way of removing elements from @code{ARGV}.
+
+The following fragment processes @code{ARGV} in order to examine, and
+then remove, command line options.
+
+@example
+@group
+BEGIN @{
+ for (i = 1; i < ARGC; i++) @{
+ if (ARGV[i] == "-v")
+ verbose = 1
+ else if (ARGV[i] == "-d")
+ debug = 1
+@end group
+@group
+ else if (ARGV[i] ~ /^-?/) @{
+ e = sprintf("%s: unrecognized option -- %c",
+ ARGV[0], substr(ARGV[i], 1, ,1))
+ print e > "/dev/stderr"
+ @} else
+ break
+ delete ARGV[i]
+ @}
+@}
+@end group
+@end example
+
+To actually get the options into the @code{awk} program, you have to
+end the @code{awk} options with @samp{--}, and then supply your options,
+like so:
+
+@example
+awk -f myprog -- -v -d file1 file2 @dots{}
+@end example
+
+@cindex differences between @code{gawk} and @code{awk}
+This is not necessary in @code{gawk}: Unless @samp{--posix} has been
+specified, @code{gawk} silently puts any unrecognized options into
+@code{ARGV} for the @code{awk} program to deal with.
+
+As soon as it
+sees an unknown option, @code{gawk} stops looking for other options it might
+otherwise recognize. The above example with @code{gawk} would be:
+
+@example
+gawk -f myprog -d -v file1 file2 @dots{}
+@end example
+
+@noindent
+Since @samp{-d} is not a valid @code{gawk} option, the following @samp{-v}
+is passed on to the @code{awk} program.
+
+@node Arrays, Built-in, Built-in Variables, Top
+@chapter Arrays in @code{awk}
+
+An @dfn{array} is a table of values, called @dfn{elements}. The
+elements of an array are distinguished by their indices. @dfn{Indices}
+may be either numbers or strings. @code{awk} maintains a single set
+of names that may be used for naming variables, arrays and functions
+(@pxref{User-defined, ,User-defined Functions}).
+Thus, you cannot have a variable and an array with the same name in the
+same @code{awk} program.
+
+@menu
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement. It
+ loops through the indices of an array's
+ existing elements.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
+* Numeric Array Subscripts:: How to use numbers as subscripts in
+ @code{awk}.
+* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Multi-dimensional:: Emulating multi-dimensional arrays in
+ @code{awk}.
+* Multi-scanning:: Scanning multi-dimensional arrays.
+@end menu
+
+@node Array Intro, Reference to Elements, Arrays, Arrays
+@section Introduction to Arrays
+
+@cindex arrays
+The @code{awk} language provides one-dimensional @dfn{arrays} for storing groups
+of related strings or numbers.
+
+Every @code{awk} array must have a name. Array names have the same
+syntax as variable names; any valid variable name would also be a valid
+array name. But you cannot use one name in both ways (as an array and
+as a variable) in one @code{awk} program.
+
+Arrays in @code{awk} superficially resemble arrays in other programming
+languages; but there are fundamental differences. In @code{awk}, you
+don't need to specify the size of an array before you start to use it.
+Additionally, any number or string in @code{awk} may be used as an
+array index, not just consecutive integers.
+
+In most other languages, you have to @dfn{declare} an array and specify
+how many elements or components it contains. In such languages, the
+declaration causes a contiguous block of memory to be allocated for that
+many elements. An index in the array usually must be a positive integer; for
+example, the index zero specifies the first element in the array, which is
+actually stored at the beginning of the block of memory. Index one
+specifies the second element, which is stored in memory right after the
+first element, and so on. It is impossible to add more elements to the
+array, because it has room for only as many elements as you declared.
+(Some languages allow arbitrary starting and ending indices,
+e.g., @samp{15 .. 27}, but the size of the array is still fixed when
+the array is declared.)
+
+A contiguous array of four elements might look like this,
+conceptually, if the element values are eight, @code{"foo"},
+@code{""} and 30:
+
+@iftex
+@c from Karl Berry, much thanks for the help.
+@tex
+\bigskip % space above the table (about 1 linespace)
+\offinterlineskip
+\newdimen\width \width = 1.5cm
+\newdimen\hwidth \hwidth = 4\width \advance\hwidth by 2pt % 5 * 0.4pt
+\centerline{\vbox{
+\halign{\strut\hfil\ignorespaces#&&\vrule#&\hbox to\width{\hfil#\unskip\hfil}\cr
+\noalign{\hrule width\hwidth}
+ &&{\tt 8} &&{\tt "foo"} &&{\tt ""} &&{\tt 30} &&\quad value\cr
+\noalign{\hrule width\hwidth}
+\noalign{\smallskip}
+ &\omit&0&\omit &1 &\omit&2 &\omit&3 &\omit&\quad index\cr
+}
+}}
+@end tex
+@end iftex
+@ifinfo
+@example
++---------+---------+--------+---------+
+| 8 | "foo" | "" | 30 | @r{value}
++---------+---------+--------+---------+
+ 0 1 2 3 @r{index}
+@end example
+@end ifinfo
+
+@noindent
+Only the values are stored; the indices are implicit from the order of
+the values. Eight is the value at index zero, because eight appears in the
+position with zero elements before it.
+
+@cindex arrays, definition of
+@cindex associative arrays
+@cindex arrays, associative
+Arrays in @code{awk} are different: they are @dfn{associative}. This means
+that each array is a collection of pairs: an index, and its corresponding
+array element value:
+
+@example
+@r{Element} 4 @r{Value} 30
+@r{Element} 2 @r{Value} "foo"
+@r{Element} 1 @r{Value} 8
+@r{Element} 3 @r{Value} ""
+@end example
+
+@noindent
+We have shown the pairs in jumbled order because their order is irrelevant.
+
+One advantage of associative arrays is that new pairs can be added
+at any time. For example, suppose we add to the above array a tenth element
+whose value is @w{@code{"number ten"}}. The result is this:
+
+@example
+@r{Element} 10 @r{Value} "number ten"
+@r{Element} 4 @r{Value} 30
+@r{Element} 2 @r{Value} "foo"
+@r{Element} 1 @r{Value} 8
+@r{Element} 3 @r{Value} ""
+@end example
+
+@noindent
+@cindex sparse arrays
+@cindex arrays, sparse
+Now the array is @dfn{sparse}, which just means some indices are missing:
+it has elements 1--4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.
+@c ok, I should spell out the above, but ...
+
+Another consequence of associative arrays is that the indices don't
+have to be positive integers. Any number, or even a string, can be
+an index. For example, here is an array which translates words from
+English into French:
+
+@example
+@r{Element} "dog" @r{Value} "chien"
+@r{Element} "cat" @r{Value} "chat"
+@r{Element} "one" @r{Value} "un"
+@r{Element} 1 @r{Value} "un"
+@end example
+
+@noindent
+Here we decided to translate the number one in both spelled-out and
+numeric form---thus illustrating that a single array can have both
+numbers and strings as indices.
+(In fact, array subscripts are always strings; this is discussed
+in more detail in
+@ref{Numeric Array Subscripts, ,Using Numbers to Subscript Arrays}.)
+
+@cindex Array subscripts and @code{IGNORECASE}
+@cindex @code{IGNORECASE} and array subscripts
+@vindex IGNORECASE
+The value of @code{IGNORECASE} has no effect upon array subscripting.
+You must use the exact same string value to retrieve an array element
+as you used to store it.
+
+When @code{awk} creates an array for you, e.g., with the @code{split}
+built-in function,
+that array's indices are consecutive integers starting at one.
+(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
+
+@node Reference to Elements, Assigning Elements, Array Intro, Arrays
+@section Referring to an Array Element
+@cindex array reference
+@cindex element of array
+@cindex reference to array
+
+The principal way of using an array is to refer to one of its elements.
+An array reference is an expression which looks like this:
+
+@example
+@var{array}[@var{index}]
+@end example
+
+@noindent
+Here, @var{array} is the name of an array. The expression @var{index} is
+the index of the element of the array that you want.
+
+The value of the array reference is the current value of that array
+element. For example, @code{foo[4.3]} is an expression for the element
+of array @code{foo} at index @samp{4.3}.
+
+If you refer to an array element that has no recorded value, the value
+of the reference is @code{""}, the null string. This includes elements
+to which you have not assigned any value, and elements that have been
+deleted (@pxref{Delete, ,The @code{delete} Statement}). Such a reference
+automatically creates that array element, with the null string as its value.
+(In some cases, this is unfortunate, because it might waste memory inside
+@code{awk}.)
+
+@cindex arrays, presence of elements
+@cindex arrays, the @code{in} operator
+You can find out if an element exists in an array at a certain index with
+the expression:
+
+@example
+@var{index} in @var{array}
+@end example
+
+@noindent
+This expression tests whether or not the particular index exists,
+without the side effect of creating that element if it is not present.
+The expression has the value one (true) if @code{@var{array}[@var{index}]}
+exists, and zero (false) if it does not exist.
+
+For example, to test whether the array @code{frequencies} contains the
+index @samp{2}, you could write this statement:
+
+@example
+if (2 in frequencies)
+ print "Subscript 2 is present."
+@end example
+
+Note that this is @emph{not} a test of whether or not the array
+@code{frequencies} contains an element whose @emph{value} is two.
+(There is no way to do that except to scan all the elements.) Also, this
+@emph{does not} create @code{frequencies[2]}, while the following
+(incorrect) alternative would do so:
+
+@example
+if (frequencies[2] != "")
+ print "Subscript 2 is present."
+@end example
+
+@node Assigning Elements, Array Example, Reference to Elements, Arrays
+@section Assigning Array Elements
+@cindex array assignment
+@cindex element assignment
+
+Array elements are lvalues: they can be assigned values just like
+@code{awk} variables:
+
+@example
+@var{array}[@var{subscript}] = @var{value}
+@end example
+
+@noindent
+Here @var{array} is the name of your array. The expression
+@var{subscript} is the index of the element of the array that you want
+to assign a value. The expression @var{value} is the value you are
+assigning to that element of the array.
+
+@node Array Example, Scanning an Array, Assigning Elements, Arrays
+@section Basic Array Example
+
+The following program takes a list of lines, each beginning with a line
+number, and prints them out in order of line number. The line numbers are
+not in order, however, when they are first read: they are scrambled. This
+program sorts the lines by making an array using the line numbers as
+subscripts. It then prints out the lines in sorted order of their numbers.
+It is a very simple program, and gets confused if it encounters repeated
+numbers, gaps, or lines that don't begin with a number.
+
+@example
+@c file eg/misc/arraymax.awk
+@{
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
+@}
+
+END @{
+ for (x = 1; x <= max; x++)
+ print arr[x]
+@}
+@c endfile
+@end example
+
+The first rule keeps track of the largest line number seen so far;
+it also stores each line into the array @code{arr}, at an index that
+is the line's number.
+
+The second rule runs after all the input has been read, to print out
+all the lines.
+
+When this program is run with the following input:
+
+@example
+@group
+@c file eg/misc/arraymax.data
+5 I am the Five man
+2 Who are you? The new number two!
+4 . . . And four on the floor
+1 Who is number one?
+3 I three you.
+@c endfile
+@end group
+@end example
+
+@noindent
+its output is this:
+
+@example
+1 Who is number one?
+2 Who are you? The new number two!
+3 I three you.
+4 . . . And four on the floor
+5 I am the Five man
+@end example
+
+If a line number is repeated, the last line with a given number overrides
+the others.
+
+Gaps in the line numbers can be handled with an easy improvement to the
+program's @code{END} rule:
+
+@example
+END @{
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
+@}
+@end example
+
+@node Scanning an Array, Delete, Array Example, Arrays
+@section Scanning All Elements of an Array
+@cindex @code{for (x in @dots{})}
+@cindex arrays, special @code{for} statement
+@cindex scanning an array
+
+In programs that use arrays, you often need a loop that executes
+once for each element of an array. In other languages, where arrays are
+contiguous and indices are limited to positive integers, this is
+easy: you can
+find all the valid indices by counting from the lowest index
+up to the highest. This
+technique won't do the job in @code{awk}, since any number or string
+can be an array index. So @code{awk} has a special kind of @code{for}
+statement for scanning an array:
+
+@example
+for (@var{var} in @var{array})
+ @var{body}
+@end example
+
+@noindent
+This loop executes @var{body} once for each index in @var{array} that your
+program has previously used, with the
+variable @var{var} set to that index.
+
+Here is a program that uses this form of the @code{for} statement. The
+first rule scans the input records and notes which words appear (at
+least once) in the input, by storing a one into the array @code{used} with
+the word as index. The second rule scans the elements of @code{used} to
+find all the distinct words that appear in the input. It prints each
+word that is more than 10 characters long, and also prints the number of
+such words. @xref{String Functions, ,Built-in Functions for String Manipulation}, for more information
+on the built-in function @code{length}.
+
+@example
+# Record a 1 for each word that is used at least once.
+@{
+ for (i = 1; i <= NF; i++)
+ used[$i] = 1
+@}
+
+# Find number of distinct words more than 10 characters long.
+END @{
+ for (x in used)
+ if (length(x) > 10) @{
+ ++num_long_words
+ print x
+ @}
+ print num_long_words, "words longer than 10 characters"
+@}
+@end example
+
+@noindent
+@xref{Word Sorting, ,Generating Word Usage Counts},
+for a more detailed example of this type.
+
+The order in which elements of the array are accessed by this statement
+is determined by the internal arrangement of the array elements within
+@code{awk} and cannot be controlled or changed. This can lead to
+problems if new elements are added to @var{array} by statements in
+the loop body; you cannot predict whether or not the @code{for} loop will
+reach them. Similarly, changing @var{var} inside the loop may produce
+strange results. It is best to avoid such things.
+
+@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements of arrays
+@cindex removing elements of arrays
+@cindex arrays, deleting an element
+
+You can remove an individual element of an array using the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index}]
+@end example
+
+Once you have deleted an array element, you can no longer obtain any
+value the element once had. It is as if you had never referred
+to it and had never given it any value.
+
+Here is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+
+If you delete an element, a subsequent @code{for} statement to scan the array
+will not report that element, and the @code{in} operator to check for
+the presence of that element will return zero (i.e.@: false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+It is not an error to delete an element that does not exist.
+
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex differences between @code{gawk} and @code{awk}
+You can delete all the elements of an array with a single statement,
+by leaving off the subscript in the @code{delete} statement.
+
+@example
+delete @var{array}
+@end example
+
+This ability is a @code{gawk} extension; it is not available in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+@cindex portability issues
+The following statement provides a portable, but non-obvious way to clear
+out an array.
+
+@cindex Brennan, Michael
+@example
+@group
+# thanks to Michael Brennan for pointing this out
+split("", array)
+@end group
+@end example
+
+The @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation})
+clears out the target array first. This call asks it to split
+apart the null string. Since there is no data to split out, the
+function simply clears the array and then returns.
+
+@node Numeric Array Subscripts, Uninitialized Subscripts, Delete, Arrays
+@section Using Numbers to Subscript Arrays
+
+An important aspect of arrays to remember is that @emph{array subscripts
+are always strings}. If you use a numeric value as a subscript,
+it will be converted to a string value before it is used for subscripting
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@cindex conversions, during subscripting
+@cindex numbers, used as subscripts
+@vindex CONVFMT
+This means that the value of the built-in variable @code{CONVFMT} can potentially
+affect how your program accesses elements of an array. For example:
+
+@example
+xyz = 12.153
+data[xyz] = 1
+CONVFMT = "%2.2f"
+@group
+if (xyz in data)
+ printf "%s is in data\n", xyz
+else
+ printf "%s is not in data\n", xyz
+@end group
+@end example
+
+@noindent
+This prints @samp{12.15 is not in data}. The first statement gives
+@code{xyz} a numeric value. Assigning to
+@code{data[xyz]} subscripts @code{data} with the string value @code{"12.153"}
+(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}),
+and assigns one to @code{data["12.153"]}. The program then changes
+the value of @code{CONVFMT}. The test @samp{(xyz in data)} generates a new
+string value from @code{xyz}, this time @code{"12.15"}, since the value of
+@code{CONVFMT} only allows two significant digits. This test fails,
+since @code{"12.15"} is a different string from @code{"12.153"}.
+
+According to the rules for conversions
+(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
+values are always converted to strings as integers, no matter what the
+value of @code{CONVFMT} may happen to be. So the usual case of:
+
+@example
+for (i = 1; i <= maxsub; i++)
+ @i{do something with} array[i]
+@end example
+
+@noindent
+will work, no matter what the value of @code{CONVFMT}.
+
+Like many things in @code{awk}, the majority of the time things work
+as you would expect them to work. But it is useful to have a precise
+knowledge of the actual rules, since sometimes they can have a subtle
+effect on your programs.
+
+@node Uninitialized Subscripts, Multi-dimensional, Numeric Array Subscripts, Arrays
+@section Using Uninitialized Variables as Subscripts
+
+@cindex uninitialized variables, as array subscripts
+@cindex array subscripts, uninitialized variables
+Suppose you want to print your input data in reverse order.
+A reasonable attempt at a program to do so (with some test
+data) might look like this:
+
+@example
+@group
+$ echo 'line 1
+> line 2
+> line 3' | awk '@{ l[lines] = $0; ++lines @}
+> END @{
+> for (i = lines-1; i >= 0; --i)
+> print l[i]
+> @}'
+@print{} line 3
+@print{} line 2
+@end group
+@end example
+
+Unfortunately, the very first line of input data did not come out in the
+output!
+
+At first glance, this program should have worked. The variable @code{lines}
+is uninitialized, and uninitialized variables have the numeric value zero.
+So, the value of @code{l[0]} should have been printed.
+
+The issue here is that subscripts for @code{awk} arrays are @strong{always}
+strings. And uninitialized variables, when used as strings, have the
+value @code{""}, not zero. Thus, @samp{line 1} ended up stored in
+@code{l[""]}.
+
+The following version of the program works correctly:
+
+@example
+@{ l[lines++] = $0 @}
+END @{
+ for (i = lines - 1; i >= 0; --i)
+ print l[i]
+@}
+@end example
+
+Here, the @samp{++} forces @code{lines} to be numeric, thus making
+the ``old value'' numeric zero, which is then converted to @code{"0"}
+as the array subscript.
+
+@cindex null string, as array subscript
+@cindex dark corner
+As we have just seen, even though it is somewhat unusual, the null string
+(@code{""}) is a valid array subscript (d.c.). If @samp{--lint} is provided
+on the command line (@pxref{Options, ,Command Line Options}),
+@code{gawk} will warn about the use of the null string as a subscript.
+
+@node Multi-dimensional, Multi-scanning, Uninitialized Subscripts, Arrays
+@section Multi-dimensional Arrays
+
+@cindex subscripts in arrays
+@cindex arrays, multi-dimensional subscripts
+@cindex multi-dimensional subscripts
+A multi-dimensional array is an array in which an element is identified
+by a sequence of indices, instead of a single index. For example, a
+two-dimensional array requires two indices. The usual way (in most
+languages, including @code{awk}) to refer to an element of a
+two-dimensional array named @code{grid} is with
+@code{grid[@var{x},@var{y}]}.
+
+@vindex SUBSEP
+Multi-dimensional arrays are supported in @code{awk} through
+concatenation of indices into one string. What happens is that
+@code{awk} converts the indices into strings
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
+concatenates them together, with a separator between them. This creates
+a single string that describes the values of the separate indices. The
+combined string is used as a single index into an ordinary,
+one-dimensional array. The separator used is the value of the built-in
+variable @code{SUBSEP}.
+
+For example, suppose we evaluate the expression @samp{foo[5,12] = "value"}
+when the value of @code{SUBSEP} is @code{"@@"}. The numbers five and 12 are
+converted to strings and
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
+the array element @code{foo["5@@12"]} is set to @code{"value"}.
+
+Once the element's value is stored, @code{awk} has no record of whether
+it was stored with a single index or a sequence of indices. The two
+expressions @samp{foo[5,12]} and @w{@samp{foo[5 SUBSEP 12]}} are always
+equivalent.
+
+The default value of @code{SUBSEP} is the string @code{"\034"},
+which contains a non-printing character that is unlikely to appear in an
+@code{awk} program or in most input data.
+
+The usefulness of choosing an unlikely character comes from the fact
+that index values that contain a string matching @code{SUBSEP} lead to
+combined strings that are ambiguous. Suppose that @code{SUBSEP} were
+@code{"@@"}; then @w{@samp{foo["a@@b", "c"]}} and @w{@samp{foo["a",
+"b@@c"]}} would be indistinguishable because both would actually be
+stored as @samp{foo["a@@b@@c"]}.
+
+You can test whether a particular index-sequence exists in a
+``multi-dimensional'' array with the same operator @samp{in} used for single
+dimensional arrays. Instead of a single index as the left-hand operand,
+write the whole sequence of indices, separated by commas, in
+parentheses:
+
+@example
+(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+@end example
+
+The following example treats its input as a two-dimensional array of
+fields; it rotates this array 90 degrees clockwise and prints the
+result. It assumes that all lines have the same number of
+elements.
+
+@example
+@group
+awk '@{
+ if (max_nf < NF)
+ max_nf = NF
+ max_nr = NR
+ for (x = 1; x <= NF; x++)
+ vector[x, NR] = $x
+@}
+@end group
+
+@group
+END @{
+ for (x = 1; x <= max_nf; x++) @{
+ for (y = max_nr; y >= 1; --y)
+ printf("%s ", vector[x, y])
+ printf("\n")
+ @}
+@}'
+@end group
+@end example
+
+@noindent
+When given the input:
+
+@example
+@group
+1 2 3 4 5 6
+2 3 4 5 6 1
+3 4 5 6 1 2
+4 5 6 1 2 3
+@end group
+@end example
+
+@noindent
+it produces:
+
+@example
+@group
+4 3 2 1
+5 4 3 2
+6 5 4 3
+1 6 5 4
+2 1 6 5
+3 2 1 6
+@end group
+@end example
+
+@node Multi-scanning, , Multi-dimensional, Arrays
+@section Scanning Multi-dimensional Arrays
+
+There is no special @code{for} statement for scanning a
+``multi-dimensional'' array; there cannot be one, because in truth there
+are no multi-dimensional arrays or elements; there is only a
+multi-dimensional @emph{way of accessing} an array.
+
+However, if your program has an array that is always accessed as
+multi-dimensional, you can get the effect of scanning it by combining
+the scanning @code{for} statement
+(@pxref{Scanning an Array, ,Scanning All Elements of an Array}) with the
+@code{split} built-in function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+It works like this:
+
+@example
+for (combined in array) @{
+ split(combined, separate, SUBSEP)
+ @dots{}
+@}
+@end example
+
+@noindent
+This sets @code{combined} to
+each concatenated, combined index in the array, and splits it
+into the individual indices by breaking it apart where the value of
+@code{SUBSEP} appears. The split-out indices become the elements of
+the array @code{separate}.
+
+Thus, suppose you have previously stored a value in @code{array[1, "foo"]};
+then an element with index @code{"1\034foo"} exists in
+@code{array}. (Recall that the default value of @code{SUBSEP} is
+the character with code 034.) Sooner or later the @code{for} statement
+will find that index and do an iteration with @code{combined} set to
+@code{"1\034foo"}. Then the @code{split} function is called as
+follows:
+
+@example
+split("1\034foo", separate, "\034")
+@end example
+
+@noindent
+The result of this is to set @code{separate[1]} to @code{"1"} and
+@code{separate[2]} to @code{"foo"}. Presto, the original sequence of
+separate indices has been recovered.
+
+@node Built-in, User-defined, Arrays, Top
+@chapter Built-in Functions
+
+@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
+@cindex built-in functions
+@dfn{Built-in} functions are functions that are always available for
+your @code{awk} program to call. This chapter defines all the built-in
+functions in @code{awk}; some of them are mentioned in other sections,
+but they are summarized here for your convenience. (You can also define
+new functions yourself. @xref{User-defined, ,User-defined Functions}.)
+
+@menu
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers, including
+ @code{int}, @code{sin} and @code{rand}.
+* String Functions:: Functions for string manipulation, such as
+ @code{split}, @code{match}, and
+ @code{sprintf}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with time stamps.
+@end menu
+
+@node Calling Built-in, Numeric Functions, Built-in, Built-in
+@section Calling Built-in Functions
+
+To call a built-in function, write the name of the function followed
+by arguments in parentheses. For example, @samp{atan2(y + z, 1)}
+is a call to the function @code{atan2}, with two arguments.
+
+Whitespace is ignored between the built-in function name and the
+open-parenthesis, but we recommend that you avoid using whitespace
+there. User-defined functions do not permit whitespace in this way, and
+you will find it easier to avoid mistakes by following a simple
+convention which always works: no whitespace after a function name.
+
+@cindex differences between @code{gawk} and @code{awk}
+Each built-in function accepts a certain number of arguments.
+In some cases, arguments can be omitted. The defaults for omitted
+arguments vary from function to function and are described under the
+individual functions. In some @code{awk} implementations, extra
+arguments given to built-in functions are ignored. However, in @code{gawk},
+it is a fatal error to give extra arguments to a built-in function.
+
+When a function is called, expressions that create the function's actual
+parameters are evaluated completely before the function call is performed.
+For example, in the code fragment:
+
+@example
+i = 4
+j = sqrt(i++)
+@end example
+
+@noindent
+the variable @code{i} is set to five before @code{sqrt} is called
+with a value of four for its actual parameter.
+
+@cindex evaluation, order of
+@cindex order of evaluation
+The order of evaluation of the expressions used for the function's
+parameters is undefined. Thus, you should not write programs that
+assume that parameters are evaluated from left to right or from
+right to left. For example,
+
+@example
+i = 5
+j = atan2(i++, i *= 2)
+@end example
+
+If the order of evaluation is left to right, then @code{i} first becomes
+six, and then 12, and @code{atan2} is called with the two arguments six
+and 12. But if the order of evaluation is right to left, @code{i}
+first becomes 10, and then 11, and @code{atan2} is called with the
+two arguments 11 and 10.
+
+@node Numeric Functions, String Functions, Calling Built-in, Built-in
+@section Numeric Built-in Functions
+
+Here is a full list of built-in functions that work with numbers.
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item int(@var{x})
+@findex int
+This produces the nearest integer to @var{x}, located between @var{x} and zero,
+truncated toward zero.
+
+For example, @code{int(3)} is three, @code{int(3.9)} is three, @code{int(-3.9)}
+is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
+
+@item sqrt(@var{x})
+@findex sqrt
+This gives you the positive square root of @var{x}. It reports an error
+if @var{x} is negative. Thus, @code{sqrt(4)} is two.
+
+@item exp(@var{x})
+@findex exp
+This gives you the exponential of @var{x} (@code{e ^ @var{x}}), or reports
+an error if @var{x} is out of range. The range of values @var{x} can have
+depends on your machine's floating point representation.
+
+@item log(@var{x})
+@findex log
+This gives you the natural logarithm of @var{x}, if @var{x} is positive;
+otherwise, it reports an error.
+
+@item sin(@var{x})
+@findex sin
+This gives you the sine of @var{x}, with @var{x} in radians.
+
+@item cos(@var{x})
+@findex cos
+This gives you the cosine of @var{x}, with @var{x} in radians.
+
+@item atan2(@var{y}, @var{x})
+@findex atan2
+This gives you the arctangent of @code{@var{y} / @var{x}} in radians.
+
+@item rand()
+@findex rand
+This gives you a random number. The values of @code{rand} are
+uniformly-distributed between zero and one.
+The value is never zero and never one.
+
+Often you want random integers instead. Here is a user-defined function
+you can use to obtain a random non-negative integer less than @var{n}:
+
+@example
+function randint(n) @{
+ return int(n * rand())
+@}
+@end example
+
+@noindent
+The multiplication produces a random real number greater than zero and less
+than @code{n}. We then make it an integer (using @code{int}) between zero
+and @code{n} @minus{} 1, inclusive.
+
+Here is an example where a similar function is used to produce
+random integers between one and @var{n}. This program
+prints a new random number for each input record.
+
+@example
+@group
+awk '
+# Function to roll a simulated die.
+function roll(n) @{ return 1 + int(rand() * n) @}
+@end group
+
+@group
+# Roll 3 six-sided dice and
+# print total number of points.
+@{
+ printf("%d points\n",
+ roll(6)+roll(6)+roll(6))
+@}'
+@end group
+@end example
+
+@cindex seed for random numbers
+@cindex random numbers, seed of
+@comment MAWK uses a different seed each time.
+@strong{Caution:} In most @code{awk} implementations, including @code{gawk},
+@code{rand} starts generating numbers from the same
+starting number, or @dfn{seed}, each time you run @code{awk}. Thus,
+a program will generate the same results each time you run it.
+The numbers are random within one @code{awk} run, but predictable
+from run to run. This is convenient for debugging, but if you want
+a program to do different things each time it is used, you must change
+the seed to a value that will be different in each run. To do this,
+use @code{srand}.
+
+@item srand(@r{[}@var{x}@r{]})
+@findex srand
+The function @code{srand} sets the starting point, or seed,
+for generating random numbers to the value @var{x}.
+
+Each seed value leads to a particular sequence of random
+numbers.@footnote{Computer generated random numbers really are not truly
+random. They are technically known as ``pseudo-random.'' This means
+that while the numbers in a sequence appear to be random, you can in
+fact generate the same sequence of random numbers over and over again.}
+Thus, if you set the seed to the same value a second time, you will get
+the same sequence of random numbers again.
+
+If you omit the argument @var{x}, as in @code{srand()}, then the current
+date and time of day are used for a seed. This is the way to get random
+numbers that are truly unpredictable.
+
+The return value of @code{srand} is the previous seed. This makes it
+easy to keep track of the seeds for use in consistently reproducing
+sequences of random numbers.
+@end table
+
+@node String Functions, I/O Functions, Numeric Functions, Built-in
+@section Built-in Functions for String Manipulation
+
+The functions in this section look at or change the text of one or more
+strings.
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item index(@var{in}, @var{find})
+@findex index
+This searches the string @var{in} for the first occurrence of the string
+@var{find}, and returns the position in characters where that occurrence
+begins in the string @var{in}. For example:
+
+@example
+$ awk 'BEGIN @{ print index("peanut", "an") @}'
+@print{} 3
+@end example
+
+@noindent
+If @var{find} is not found, @code{index} returns zero.
+(Remember that string indices in @code{awk} start at one.)
+
+@item length(@r{[}@var{string}@r{]})
+@findex length
+This gives you the number of characters in @var{string}. If
+@var{string} is a number, the length of the digit string representing
+that number is returned. For example, @code{length("abcde")} is five. By
+contrast, @code{length(15 * 35)} works out to three. How? Well, 15 * 35 =
+525, and 525 is then converted to the string @code{"525"}, which has
+three characters.
+
+If no argument is supplied, @code{length} returns the length of @code{$0}.
+
+@cindex historical features
+@cindex portability issues
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In older versions of @code{awk}, you could call the @code{length} function
+without any parentheses. Doing so is marked as ``deprecated'' in the
+POSIX standard. This means that while you can do this in your
+programs, it is a feature that can eventually be removed from a future
+version of the standard. Therefore, for maximal portability of your
+@code{awk} programs, you should always supply the parentheses.
+
+@item match(@var{string}, @var{regexp})
+@findex match
+The @code{match} function searches the string, @var{string}, for the
+longest, leftmost substring matched by the regular expression,
+@var{regexp}. It returns the character position, or @dfn{index}, of
+where that substring begins (one, if it starts at the beginning of
+@var{string}). If no match is found, it returns zero.
+
+@vindex RSTART
+@vindex RLENGTH
+The @code{match} function sets the built-in variable @code{RSTART} to
+the index. It also sets the built-in variable @code{RLENGTH} to the
+length in characters of the matched substring. If no match is found,
+@code{RSTART} is set to zero, and @code{RLENGTH} to @minus{}1.
+
+For example:
+
+@example
+@group
+@c file eg/misc/findpat.sh
+awk '@{
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", \
+ where, "in", $0
+ @}
+@}'
+@c endfile
+@end group
+@end example
+
+@noindent
+This program looks for lines that match the regular expression stored in
+the variable @code{regex}. This regular expression can be changed. If the
+first word on a line is @samp{FIND}, @code{regex} is changed to be the
+second word on that line. Therefore, given:
+
+@example
+@c file eg/misc/findpat.data
+FIND ru+n
+My program runs
+but not very quickly
+FIND Melvin
+JF+KM
+This line is property of Reality Engineering Co.
+Melvin was here.
+@c endfile
+@end example
+
+@noindent
+@code{awk} prints:
+
+@example
+Match of ru+n found at 12 in My program runs
+Match of Melvin found at 1 in Melvin was here.
+@end example
+
+@item split(@var{string}, @var{array} @r{[}, @var{fieldsep}@r{]})
+@findex split
+This divides @var{string} into pieces separated by @var{fieldsep},
+and stores the pieces in @var{array}. The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth. The string value of the third argument, @var{fieldsep}, is
+a regexp describing where to split @var{string} (much as @code{FS} can
+be a regexp describing where to split input records). If
+the @var{fieldsep} is omitted, the value of @code{FS} is used.
+@code{split} returns the number of elements created.
+
+The @code{split} function splits strings into pieces in a
+manner similar to the way input lines are split into fields. For example:
+
+@example
+split("cul-de-sac", a, "-")
+@end example
+
+@noindent
+splits the string @samp{cul-de-sac} into three fields using @samp{-} as the
+separator. It sets the contents of the array @code{a} as follows:
+
+@example
+a[1] = "cul"
+a[2] = "de"
+a[3] = "sac"
+@end example
+
+@noindent
+The value returned by this call to @code{split} is three.
+
+As with input field-splitting, when the value of @var{fieldsep} is
+@w{@code{" "}}, leading and trailing whitespace is ignored, and the elements
+are separated by runs of whitespace.
+
+@cindex differences between @code{gawk} and @code{awk}
+Also as with input field-splitting, if @var{fieldsep} is the null string, each
+individual character in the string is split into its own array element.
+(This is a @code{gawk}-specific extension.)
+
+@cindex dark corner
+Recent implementations of @code{awk}, including @code{gawk}, allow
+the third argument to be a regexp constant (@code{/abc/}), as well as a
+string (d.c.). The POSIX standard allows this as well.
+
+Before splitting the string, @code{split} deletes any previously existing
+elements in the array @var{array} (d.c.).
+
+@item sprintf(@var{format}, @var{expression1},@dots{})
+@findex sprintf
+This returns (without printing) the string that @code{printf} would
+have printed out with the same arguments
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+For example:
+
+@example
+sprintf("pi = %.2f (approx.)", 22/7)
+@end example
+
+@noindent
+returns the string @w{@code{"pi = 3.14 (approx.)"}}.
+
+@ignore
+2e: For sub, gsub, and gensub, either here or in the "how much matches"
+ section, we need some explanation that it is possible to match the
+ null string when using closures like *. E.g.,
+
+ $ echo abc | awk '{ gsub(/m*/, "X"); print }'
+ @print{} XaXbXcX
+
+ Although this makes a certain amount of sense, it can be very
+ suprising.
+@end ignore
+
+@item sub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@findex sub
+The @code{sub} function alters the value of @var{target}.
+It searches this value, which is treated as a string, for the
+leftmost longest substring matched by the regular expression, @var{regexp},
+extending this match as far as possible. Then the entire string is
+changed by replacing the matched text with @var{replacement}.
+The modified string becomes the new value of @var{target}.
+
+This function is peculiar because @var{target} is not simply
+used to compute a value, and not just any expression will do: it
+must be a variable, field or array element, so that @code{sub} can
+store a modified value there. If this argument is omitted, then the
+default is to use and alter @code{$0}.
+
+For example:
+
+@example
+str = "water, water, everywhere"
+sub(/at/, "ith", str)
+@end example
+
+@noindent
+sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
+leftmost, longest occurrence of @samp{at} with @samp{ith}.
+
+The @code{sub} function returns the number of substitutions made (either
+one or zero).
+
+If the special character @samp{&} appears in @var{replacement}, it
+stands for the precise substring that was matched by @var{regexp}. (If
+the regexp can match more than one string, then this precise substring
+may vary.) For example:
+
+@example
+awk '@{ sub(/candidate/, "& and his wife"); print @}'
+@end example
+
+@noindent
+changes the first occurrence of @samp{candidate} to @samp{candidate
+and his wife} on each input line.
+
+Here is another example:
+
+@example
+awk 'BEGIN @{
+ str = "daabaaa"
+ sub(/a*/, "c&c", str)
+ print str
+@}'
+@print{} dcaacbaaa
+@end example
+
+@noindent
+This shows how @samp{&} can represent a non-constant string, and also
+illustrates the ``leftmost, longest'' rule in regexp matching
+(@pxref{Leftmost Longest, ,How Much Text Matches?}).
+
+The effect of this special character (@samp{&}) can be turned off by putting a
+backslash before it in the string. As usual, to insert one backslash in
+the string, you must write two backslashes. Therefore, write @samp{\\&}
+in a string constant to include a literal @samp{&} in the replacement.
+For example, here is how to replace the first @samp{|} on each line with
+an @samp{&}:
+
+@example
+awk '@{ sub(/\|/, "\\&"); print @}'
+@end example
+
+@cindex @code{sub}, third argument of
+@cindex @code{gsub}, third argument of
+@strong{Note:} As mentioned above, the third argument to @code{sub} must
+be a variable, field or array reference.
+Some versions of @code{awk} allow the third argument to
+be an expression which is not an lvalue. In such a case, @code{sub}
+would still search for the pattern and return zero or one, but the result of
+the substitution (if any) would be thrown away because there is no place
+to put it. Such versions of @code{awk} accept expressions like
+this:
+
+@example
+sub(/USA/, "United States", "the USA and Canada")
+@end example
+
+@noindent
+For historical compatibility, @code{gawk} will accept erroneous code,
+such as in the above example. However, using any other non-changeable
+object as the third parameter will cause a fatal error, and your program
+will not run.
+
+@item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@findex gsub
+This is similar to the @code{sub} function, except @code{gsub} replaces
+@emph{all} of the longest, leftmost, @emph{non-overlapping} matching
+substrings it can find. The @samp{g} in @code{gsub} stands for
+``global,'' which means replace everywhere. For example:
+
+@example
+awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
+@end example
+
+@noindent
+replaces all occurrences of the string @samp{Britain} with @samp{United
+Kingdom} for all input records.
+
+The @code{gsub} function returns the number of substitutions made. If
+the variable to be searched and altered, @var{target}, is
+omitted, then the entire input record, @code{$0}, is used.
+
+As in @code{sub}, the characters @samp{&} and @samp{\} are special,
+and the third argument must be an lvalue.
+@end table
+
+@table @code
+@item gensub(@var{regexp}, @var{replacement}, @var{how} @r{[}, @var{target}@r{]})
+@findex gensub
+@code{gensub} is a general substitution function. Like @code{sub} and
+@code{gsub}, it searches the target string @var{target} for matches of
+the regular expression @var{regexp}. Unlike @code{sub} and
+@code{gsub}, the modified string is returned as the result of the
+function, and the original target string is @emph{not} changed. If
+@var{how} is a string beginning with @samp{g} or @samp{G}, then it
+replaces all matches of @var{regexp} with @var{replacement}.
+Otherwise, @var{how} is a number indicating which match of @var{regexp}
+to replace. If no @var{target} is supplied, @code{$0} is used instead.
+
+@code{gensub} provides an additional feature that is not available
+in @code{sub} or @code{gsub}: the ability to specify components of
+a regexp in the replacement text. This is done by using parentheses
+in the regexp to mark the components, and then specifying @samp{\@var{n}}
+in the replacement text, where @var{n} is a digit from one to nine.
+For example:
+
+@example
+@group
+$ gawk '
+> BEGIN @{
+> a = "abc def"
+> b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)
+> print b
+> @}'
+@print{} def abc
+@end group
+@end example
+
+@noindent
+As described above for @code{sub}, you must type two backslashes in order
+to get one into the string.
+
+In the replacement text, the sequence @samp{\0} represents the entire
+matched text, as does the character @samp{&}.
+
+This example shows how you can use the third argument to control
+which match of the regexp should be changed.
+
+@example
+$ echo a b c a b c |
+> gawk '@{ print gensub(/a/, "AA", 2) @}'
+@print{} a b c AA b c
+@end example
+
+In this case, @code{$0} is used as the default target string.
+@code{gensub} returns the new string as its result, which is
+passed directly to @code{print} for printing.
+
+If the @var{how} argument is a string that does not begin with @samp{g} or
+@samp{G}, or if it is a number that is less than zero, only one
+substitution is performed.
+
+@cindex differences between @code{gawk} and @code{awk}
+@code{gensub} is a @code{gawk} extension; it is not available
+in compatibility mode (@pxref{Options, ,Command Line Options}).
+
+@item substr(@var{string}, @var{start} @r{[}, @var{length}@r{]})
+@findex substr
+This returns a @var{length}-character-long substring of @var{string},
+starting at character number @var{start}. The first character of a
+string is character number one. For example,
+@code{substr("washington", 5, 3)} returns @code{"ing"}.
+
+If @var{length} is not present, this function returns the whole suffix of
+@var{string} that begins at character number @var{start}. For example,
+@code{substr("washington", 5)} returns @code{"ington"}. The whole
+suffix is also returned
+if @var{length} is greater than the number of characters remaining
+in the string, counting from character number @var{start}.
+
+@strong{Note:} The string returned by @code{substr} @emph{cannot} be
+assigned to. Thus, it is a mistake to attempt to change a portion of
+a string, like this:
+
+@example
+string = "abcdef"
+# try to get "abCDEf", won't work
+substr(string, 3, 3) = "CDE"
+@end example
+
+@noindent
+or to use @code{substr} as the third agument of @code{sub} or @code{gsub}:
+
+@example
+gsub(/xyz/, "pdq", substr($0, 5, 20)) # WRONG
+@end example
+
+@cindex case conversion
+@cindex conversion of case
+@item tolower(@var{string})
+@findex tolower
+This returns a copy of @var{string}, with each upper-case character
+in the string replaced with its corresponding lower-case character.
+Non-alphabetic characters are left unchanged. For example,
+@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
+
+@item toupper(@var{string})
+@findex toupper
+This returns a copy of @var{string}, with each lower-case character
+in the string replaced with its corresponding upper-case character.
+Non-alphabetic characters are left unchanged. For example,
+@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading More About @samp{\} and @samp{&} with @code{sub}, @code{gsub} and @code{gensub}
+
+@cindex escape processing, @code{sub} et. al.
+When using @code{sub}, @code{gsub} or @code{gensub}, and trying to get literal
+backslashes and ampersands into the replacement text, you need to remember
+that there are several levels of @dfn{escape processing} going on.
+
+First, there is the @dfn{lexical} level, which is when @code{awk} reads
+your program, and builds an internal copy of your program that can
+be executed.
+
+Then there is the run-time level, when @code{awk} actually scans the
+replacement string to determine what to generate.
+
+At both levels, @code{awk} looks for a defined set of characters that
+can come after a backslash. At the lexical level, it looks for the
+escape sequences listed in @ref{Escape Sequences}.
+Thus, for every @samp{\} that @code{awk} will process at the run-time
+level, you type two @samp{\}s at the lexical level.
+When a character that is not valid for an escape sequence follows the
+@samp{\}, Unix @code{awk} and @code{gawk} both simply remove the initial
+@samp{\}, and put the following character into the string. Thus, for
+example, @code{"a\qb"} is treated as @code{"aqb"}.
+
+At the run-time level, the various functions handle sequences of
+@samp{\} and @samp{&} differently. The situation is (sadly) somewhat complex.
+
+Historically, the @code{sub} and @code{gsub} functions treated the two
+character sequence @samp{\&} specially; this sequence was replaced in
+the generated text with a single @samp{&}. Any other @samp{\} within
+the @var{replacement} string that did not precede an @samp{&} was passed
+through unchanged. To illustrate with a table:
+
+@c Thank to Karl Berry for help with the TeX stuff.
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+ @code{\&}! @code{&}!the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+ @code{\\\&}! @code{\&}!a literal @samp{&}@cr
+@code{\\\\&}! @code{\\&}!a literal @samp{\&}@cr
+@code{\\\\\&}! @code{\\&}!a literal @samp{\&}@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}@cr
+ @code{\\q}! @code{\q}!a literal @samp{\q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{sub} sees @code{sub} generates
+ -------- ---------- ---------------
+ @code{\&} @code{&} the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\\&} @code{\&} a literal @samp{&}
+ @code{\\\\&} @code{\\&} a literal @samp{\&}
+ @code{\\\\\&} @code{\\&} a literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} a literal @samp{\\&}
+ @code{\\q} @code{\q} a literal @samp{\q}
+@end display
+@end ifinfo
+
+@noindent
+This table shows both the lexical level processing, where
+an odd number of backslashes becomes an even number at the run time level,
+and the run-time processing done by @code{sub}.
+(For the sake of simplicity, the rest of the tables below only show the
+case of even numbers of @samp{\}s entered at the lexical level.)
+
+The problem with the historical approach is that there is no way to get
+a literal @samp{\} followed by the matched text.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+The 1992 POSIX standard attempted to fix this problem. The standard
+says that @code{sub} and @code{gsub} look for either a @samp{\} or an @samp{&}
+after the @samp{\}. If either one follows a @samp{\}, that character is
+output literally. The interpretation of @samp{\} and @samp{&} then becomes
+like this:
+
+@c thanks to Karl Berry for formatting this table
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+ @code{&}! @code{&}!the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{sub} sees @code{sub} generates
+ -------- ---------- ---------------
+ @code{&} @code{&} the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
+@end display
+@end ifinfo
+
+@noindent
+This would appear to solve the problem.
+Unfortunately, the phrasing of the standard is unusual. It
+says, in effect, that @samp{\} turns off the special meaning of any
+following character, but that for anything other than @samp{\} and @samp{&},
+such special meaning is undefined. This wording leads to two problems.
+
+@enumerate
+@item
+Backslashes must now be doubled in the @var{replacement} string, breaking
+historical @code{awk} programs.
+
+@item
+To make sure that an @code{awk} program is portable, @emph{every} character
+in the @var{replacement} string must be preceded with a
+backslash.@footnote{This consequence was certainly unintended.}
+@c I can say that, 'cause I was involved in making this change
+@end enumerate
+
+The POSIX standard is under revision.@footnote{As of @value{UPDATE-MONTH},
+with final approval and publication hopefully sometime in 1997.}
+Because of the above problems, proposed text for the revised standard
+reverts to rules that correspond more closely to the original existing
+practice. The proposed rules have special cases that make it possible
+to produce a @samp{\} preceding the matched text.
+
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr
+@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+ @code{\\q}! @code{\q}!a literal @samp{\q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{sub} sees @code{sub} generates
+ -------- ---------- ---------------
+@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
+ @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\q} @code{\q} a literal @samp{\q}
+@end display
+@end ifinfo
+
+In a nutshell, at the run-time level, there are now three special sequences
+of characters, @samp{\\\&}, @samp{\\&} and @samp{\&}, whereas historically,
+there was only one. However, as in the historical case, any @samp{\} that
+is not part of one of these three sequences is not special, and appears
+in the output literally.
+
+@code{gawk} 3.0 follows these proposed POSIX rules for @code{sub} and
+@code{gsub}.
+@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
+Whether these proposed rules will actually become codified into the
+standard is unknown at this point. Subsequent @code{gawk} releases will
+track the standard and implement whatever the final version specifies;
+this @value{DOCUMENT} will be updated as well.
+
+The rules for @code{gensub} are considerably simpler. At the run-time
+level, whenever @code{gawk} sees a @samp{\}, if the following character
+is a digit, then the text that matched the corresponding parenthesized
+subexpression is placed in the generated output. Otherwise,
+no matter what the character after the @samp{\} is, that character will
+appear in the generated text, and the @samp{\} will not.
+
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{gensub} sees!@code{gensub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+ @code{&}! @code{&}!the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+ @code{\\\\}! @code{\\}!a literal @samp{\}@cr
+ @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr
+ @code{\\q}! @code{\q}!a literal @samp{q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{gensub} sees @code{gensub} generates
+ -------- ------------- ------------------
+ @code{&} @code{&} the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\\\} @code{\\} a literal @samp{\}
+ @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
+ @code{\\q} @code{\q} a literal @samp{q}
+@end display
+@end ifinfo
+
+Because of the complexity of the lexical and run-time level processing,
+and the special cases for @code{sub} and @code{gsub},
+we recommend the use of @code{gawk} and @code{gensub} for when you have
+to do substitutions.
+
+@node I/O Functions, Time Functions, String Functions, Built-in
+@section Built-in Functions for Input/Output
+
+The following functions are related to Input/Output (I/O).
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item close(@var{filename})
+@findex close
+Close the file @var{filename}, for input or output. The argument may
+alternatively be a shell command that was used for redirecting to or
+from a pipe; then the pipe is closed.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes},
+for more information.
+
+@item fflush(@r{[}@var{filename}@r{]})
+@findex fflush
+@cindex portability issues
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+Flush any buffered output associated @var{filename}, which is either a
+file opened for writing, or a shell command for redirecting output to
+a pipe.
+
+Many utility programs will @dfn{buffer} their output; they save information
+to be written to a disk file or terminal in memory, until there is enough
+for it to be worthwhile to send the data to the ouput device.
+This is often more efficient than writing
+every little bit of information as soon as it is ready. However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers; that is,
+write the information to its destination, even if a buffer is not full.
+This is the purpose of the @code{fflush} function; @code{gawk} too
+buffers its output, and the @code{fflush} function can be used to force
+@code{gawk} to flush its buffers.
+
+@code{fflush} is a recent (1994) addition to the Bell Labs research
+version of @code{awk}; it is not part of the POSIX standard, and will
+not be available if @samp{--posix} has been specified on the command
+line (@pxref{Options, ,Command Line Options}).
+
+@code{gawk} extends the @code{fflush} function in two ways. The first
+is to allow no argument at all. In this case, the buffer for the
+standard output is flushed. The second way is to allow the null string
+(@w{@code{""}}) as the argument. In this case, the buffers for
+@emph{all} open output files and pipes are flushed.
+
+@code{fflush} returns zero if the buffer was successfully flushed,
+and nonzero otherwise.
+
+@item system(@var{command})
+@findex system
+@cindex interaction, @code{awk} and other programs
+The system function allows the user to execute operating system commands
+and then return to the @code{awk} program. The @code{system} function
+executes the command given by the string @var{command}. It returns, as
+its value, the status returned by the command that was executed.
+
+For example, if the following fragment of code is put in your @code{awk}
+program:
+
+@example
+END @{
+ system("date | mail -s 'awk run done' root")
+@}
+@end example
+
+@noindent
+the system administrator will be sent mail when the @code{awk} program
+finishes processing input and begins its end-of-input processing.
+
+Note that redirecting @code{print} or @code{printf} into a pipe is often
+enough to accomplish your task. However, if your @code{awk}
+program is interactive, @code{system} is useful for cranking up large
+self-contained programs, such as a shell or an editor.
+
+Some operating systems cannot implement the @code{system} function.
+@code{system} causes a fatal error if it is not supported.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading Interactive vs. Non-Interactive Buffering
+@cindex buffering, interactive vs. non-interactive
+@cindex buffering, non-interactive vs. interactive
+@cindex interactive buffering vs. non-interactive
+@cindex non-interactive buffering vs. interactive
+
+As a side point, buffering issues can be even more confusing depending
+upon whether or not your program is @dfn{interactive}, i.e., communicating
+with a user sitting at a keyboard.@footnote{A program is interactive
+if the standard output is connected
+to a terminal device.}
+
+Interactive programs generally @dfn{line buffer} their output; they
+write out every line. Non-interactive programs wait until they have
+a full buffer, which may be many lines of output.
+
+@c Thanks to Walter.Mecky@dresdnerbank.de for this example, and for
+@c motivating me to write this section.
+Here is an example of the difference.
+
+@example
+$ awk '@{ print $1 + $2 @}'
+1 1
+@print{} 2
+2 3
+@print{} 5
+@kbd{Control-d}
+@end example
+
+@noindent
+Each line of output is printed immediately. Compare that behavior
+with this example.
+
+@example
+$ awk '@{ print $1 + $2 @}' | cat
+1 1
+2 3
+@kbd{Control-d}
+@print{} 2
+@print{} 5
+@end example
+
+@noindent
+Here, no output is printed until after the @kbd{Control-d} is typed, since
+it is all buffered, and sent down the pipe to @code{cat} in one shot.
+
+@c fakenode --- for prepinfo
+@subheading Controlling Output Buffering with @code{system}
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+
+The @code{fflush} function provides explicit control over output buffering for
+individual files and pipes. However, its use is not portable to many other
+@code{awk} implementations. An alternative method to flush output
+buffers is by calling @code{system} with a null string as its argument:
+
+@example
+system("") # flush output
+@end example
+
+@noindent
+@code{gawk} treats this use of the @code{system} function as a special
+case, and is smart enough not to run a shell (or other command
+interpreter) with the empty command. Therefore, with @code{gawk}, this
+idiom is not only useful, it is efficient. While this method should work
+with other @code{awk} implementations, it will not necessarily avoid
+starting an unnecessary shell. (Other implementations may only
+flush the buffer associated with the standard output, and not necessarily
+all buffered output.)
+
+If you think about what a programmer expects, it makes sense that
+@code{system} should flush any pending output. The following program:
+
+@example
+BEGIN @{
+ print "first print"
+ system("echo system echo")
+ print "second print"
+@}
+@end example
+
+@noindent
+must print
+
+@example
+first print
+system echo
+second print
+@end example
+
+@noindent
+and not
+
+@example
+system echo
+first print
+second print
+@end example
+
+If @code{awk} did not flush its buffers before calling @code{system}, the
+latter (undesirable) output is what you would see.
+
+@node Time Functions, , I/O Functions, Built-in
+@section Functions for Dealing with Time Stamps
+
+@cindex timestamps
+@cindex time of day
+A common use for @code{awk} programs is the processing of log files
+containing time stamp information, indicating when a
+particular log record was written. Many programs log their time stamp
+in the form returned by the @code{time} system call, which is the
+number of seconds since a particular epoch. On POSIX systems,
+it is the number of seconds since Midnight, January 1, 1970, UTC.
+
+In order to make it easier to process such log files, and to produce
+useful reports, @code{gawk} provides two functions for working with time
+stamps. Both of these are @code{gawk} extensions; they are not specified
+in the POSIX standard, nor are they in any other known version
+of @code{awk}.
+
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item systime()
+@findex systime
+This function returns the current time as the number of seconds since
+the system epoch. On POSIX systems, this is the number of seconds
+since Midnight, January 1, 1970, UTC. It may be a different number on
+other systems.
+
+@item strftime(@r{[}@var{format} @r{[}, @var{timestamp}@r{]]})
+@findex strftime
+This function returns a string. It is similar to the function of the
+same name in ANSI C. The time specified by @var{timestamp} is used to
+produce a string, based on the contents of the @var{format} string.
+The @var{timestamp} is in the same format as the value returned by the
+@code{systime} function. If no @var{timestamp} argument is supplied,
+@code{gawk} will use the current time of day as the time stamp.
+If no @var{format} argument is supplied, @code{strftime} uses
+@code{@w{"%a %b %d %H:%M:%S %Z %Y"}}. This format string produces
+output (almost) equivalent to that of the @code{date} utility.
+(Versions of @code{gawk} prior to 3.0 require the @var{format} argument.)
+@end table
+
+The @code{systime} function allows you to compare a time stamp from a
+log file with the current time of day. In particular, it is easy to
+determine how long ago a particular record was logged. It also allows
+you to produce log records using the ``seconds since the epoch'' format.
+
+The @code{strftime} function allows you to easily turn a time stamp
+into human-readable information. It is similar in nature to the @code{sprintf}
+function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+in that it copies non-format specification characters verbatim to the
+returned string, while substituting date and time values for format
+specifications in the @var{format} string.
+
+@code{strftime} is guaranteed by the ANSI C standard to support
+the following date format specifications:
+
+@table @code
+@item %a
+The locale's abbreviated weekday name.
+
+@item %A
+The locale's full weekday name.
+
+@item %b
+The locale's abbreviated month name.
+
+@item %B
+The locale's full month name.
+
+@item %c
+The locale's ``appropriate'' date and time representation.
+
+@item %d
+The day of the month as a decimal number (01--31).
+
+@item %H
+The hour (24-hour clock) as a decimal number (00--23).
+
+@item %I
+The hour (12-hour clock) as a decimal number (01--12).
+
+@item %j
+The day of the year as a decimal number (001--366).
+
+@item %m
+The month as a decimal number (01--12).
+
+@item %M
+The minute as a decimal number (00--59).
+
+@item %p
+The locale's equivalent of the AM/PM designations associated
+with a 12-hour clock.
+
+@item %S
+The second as a decimal number (00--60).@footnote{Occasionally there are
+minutes in a year with a leap second, which is why the
+seconds can go up to 60.}
+
+@item %U
+The week number of the year (the first Sunday as the first day of week one)
+as a decimal number (00--53).
+
+@item %w
+The weekday as a decimal number (0--6). Sunday is day zero.
+
+@item %W
+The week number of the year (the first Monday as the first day of week one)
+as a decimal number (00--53).
+
+@item %x
+The locale's ``appropriate'' date representation.
+
+@item %X
+The locale's ``appropriate'' time representation.
+
+@item %y
+The year without century as a decimal number (00--99).
+
+@item %Y
+The year with century as a decimal number (e.g., 1995).
+
+@item %Z
+The time zone name or abbreviation, or no characters if
+no time zone is determinable.
+
+@item %%
+A literal @samp{%}.
+@end table
+
+If a conversion specifier is not one of the above, the behavior is
+undefined.@footnote{This is because ANSI C leaves the
+behavior of the C version of @code{strftime} undefined, and @code{gawk}
+will use the system's version of @code{strftime} if it's there.
+Typically, the conversion specifier will either not appear in the
+returned string, or it will appear literally.}
+
+@cindex locale, definition of
+Informally, a @dfn{locale} is the geographic place in which a program
+is meant to run. For example, a common way to abbreviate the date
+September 4, 1991 in the United States would be ``9/4/91''.
+In many countries in Europe, however, it would be abbreviated ``4.9.91''.
+Thus, the @samp{%x} specification in a @code{"US"} locale might produce
+@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
+@samp{4.9.91}. The ANSI C standard defines a default @code{"C"}
+locale, which is an environment that is typical of what most C programmers
+are used to.
+
+A public-domain C version of @code{strftime} is supplied with @code{gawk}
+for systems that are not yet fully ANSI-compliant. If that version is
+used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
+then the following additional format specifications are available:
+
+@table @code
+@item %D
+Equivalent to specifying @samp{%m/%d/%y}.
+
+@item %e
+The day of the month, padded with a space if it is only one digit.
+
+@item %h
+Equivalent to @samp{%b}, above.
+
+@item %n
+A newline character (ASCII LF).
+
+@item %r
+Equivalent to specifying @samp{%I:%M:%S %p}.
+
+@item %R
+Equivalent to specifying @samp{%H:%M}.
+
+@item %T
+Equivalent to specifying @samp{%H:%M:%S}.
+
+@item %t
+A tab character.
+
+@item %k
+The hour (24-hour clock) as a decimal number (0-23).
+Single digit numbers are padded with a space.
+
+@item %l
+The hour (12-hour clock) as a decimal number (1-12).
+Single digit numbers are padded with a space.
+
+@item %C
+The century, as a number between 00 and 99.
+
+@item %u
+The weekday as a decimal number
+[1 (Monday)--7].
+
+@cindex ISO 8601
+@item %V
+The week number of the year (the first Monday as the first
+day of week one) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week one, otherwise it is week 53 of the previous year
+and the next week is week one).
+
+@item %G
+The year with century of the ISO week number, as a decimal number.
+
+For example, January 1, 1993, is in week 53 of 1992. Thus, the year
+of its ISO week number is 1992, even though its year is 1993.
+Similarly, December 31, 1973, is in week 1 of 1974. Thus, the year
+of its ISO week number is 1974, even though its year is 1973.
+
+@item %g
+The year without century of the ISO week number, as a decimal number (00--99).
+
+@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
+@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
+These are ``alternate representations'' for the specifications
+that use only the second letter (@samp{%c}, @samp{%C}, and so on).
+They are recognized, but their normal representations are
+used.@footnote{If you don't understand any of this, don't worry about
+it; these facilities are meant to make it easier to ``internationalize''
+programs.}
+(These facilitate compliance with the POSIX @code{date} utility.)
+
+@item %v
+The date in VMS format (e.g., 20-JUN-1991).
+
+@cindex RFC-822
+@cindex RFC-1036
+@item %z
+The timezone offset in a +HHMM format (e.g., the format necessary to
+produce RFC-822/RFC-1036 date headers).
+@end table
+
+This example is an @code{awk} implementation of the POSIX
+@code{date} utility. Normally, the @code{date} utility prints the
+current date and time of day in a well known format. However, if you
+provide an argument to it that begins with a @samp{+}, @code{date}
+will copy non-format specifier characters to the standard output, and
+will interpret the current time according to the format specifiers in
+the string. For example:
+
+@example
+$ date '+Today is %A, %B %d, %Y.'
+@print{} Today is Thursday, July 11, 1991.
+@end example
+
+Here is the @code{gawk} version of the @code{date} utility.
+It has a shell ``wrapper'', to handle the @samp{-u} option,
+which requires that @code{date} run as if the time zone
+was set to UTC.
+
+@example
+@group
+#! /bin/sh
+#
+# date --- approximate the P1003.2 'date' command
+
+case $1 in
+-u) TZ=GMT0 # use UTC
+ export TZ
+ shift ;;
+esac
+@end group
+
+@group
+gawk 'BEGIN @{
+ format = "%a %b %d %H:%M:%S %Z %Y"
+ exitval = 0
+@end group
+
+@group
+ if (ARGC > 2)
+ exitval = 1
+ else if (ARGC == 2) @{
+ format = ARGV[1]
+ if (format ~ /^\+/)
+ format = substr(format, 2) # remove leading +
+ @}
+ print strftime(format)
+ exit exitval
+@}' "$@@"
+@end group
+@end example
+
+@node User-defined, Invoking Gawk, Built-in, Top
+@chapter User-defined Functions
+
+@cindex user-defined functions
+@cindex functions, user-defined
+Complicated @code{awk} programs can often be simplified by defining
+your own functions. User-defined functions can be called just like
+built-in ones (@pxref{Function Calls}), but it is up to you to define
+them---to tell @code{awk} what they should do.
+
+@menu
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and what it
+ does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
+@end menu
+
+@node Definition Syntax, Function Example, User-defined, User-defined
+@section Function Definition Syntax
+@cindex defining functions
+@cindex function definition
+
+Definitions of functions can appear anywhere between the rules of an
+@code{awk} program. Thus, the general form of an @code{awk} program is
+extended to include sequences of rules @emph{and} user-defined function
+definitions.
+There is no need in @code{awk} to put the definition of a function
+before all uses of the function. This is because @code{awk} reads the
+entire program before starting to execute any of it.
+
+The definition of a function named @var{name} looks like this:
+
+@example
+function @var{name}(@var{parameter-list})
+@{
+ @var{body-of-function}
+@}
+@end example
+
+@cindex names, use of
+@cindex namespaces
+@noindent
+@var{name} is the name of the function to be defined. A valid function
+name is like a valid variable name: a sequence of letters, digits and
+underscores, not starting with a digit.
+Within a single @code{awk} program, any particular name can only be
+used as a variable, array or function.
+
+@var{parameter-list} is a list of the function's arguments and local
+variable names, separated by commas. When the function is called,
+the argument names are used to hold the argument values given in
+the call. The local variables are initialized to the empty string.
+A function cannot have two parameters with the same name.
+
+The @var{body-of-function} consists of @code{awk} statements. It is the
+most important part of the definition, because it says what the function
+should actually @emph{do}. The argument names exist to give the body a
+way to talk about the arguments; local variables, to give the body
+places to keep temporary values.
+
+Argument names are not distinguished syntactically from local variable
+names; instead, the number of arguments supplied when the function is
+called determines how many argument variables there are. Thus, if three
+argument values are given, the first three names in @var{parameter-list}
+are arguments, and the rest are local variables.
+
+It follows that if the number of arguments is not the same in all calls
+to the function, some of the names in @var{parameter-list} may be
+arguments on some occasions and local variables on others. Another
+way to think of this is that omitted arguments default to the
+null string.
+
+Usually when you write a function you know how many names you intend to
+use for arguments and how many you intend to use as local variables. It is
+conventional to place some extra space between the arguments and
+the local variables, to document how your function is supposed to be used.
+
+@cindex variable shadowing
+During execution of the function body, the arguments and local variable
+values hide or @dfn{shadow} any variables of the same names used in the
+rest of the program. The shadowed variables are not accessible in the
+function definition, because there is no way to name them while their
+names have been taken away for the local variables. All other variables
+used in the @code{awk} program can be referenced or set normally in the
+function's body.
+
+The arguments and local variables last only as long as the function body
+is executing. Once the body finishes, you can once again access the
+variables that were shadowed while the function was running.
+
+@cindex recursive function
+@cindex function, recursive
+The function body can contain expressions which call functions. They
+can even call this function, either directly or by way of another
+function. When this happens, we say the function is @dfn{recursive}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In many @code{awk} implementations, including @code{gawk},
+the keyword @code{function} may be
+abbreviated @code{func}. However, POSIX only specifies the use of
+the keyword @code{function}. This actually has some practical implications.
+If @code{gawk} is in POSIX-compatibility mode
+(@pxref{Options, ,Command Line Options}), then the following
+statement will @emph{not} define a function:
+
+@example
+func foo() @{ a = sqrt($1) ; print a @}
+@end example
+
+@noindent
+Instead it defines a rule that, for each record, concatenates the value
+of the variable @samp{func} with the return value of the function @samp{foo}.
+If the resulting string is non-null, the action is executed.
+This is probably not what was desired. (@code{awk} accepts this input as
+syntactically valid, since functions may be used before they are defined
+in @code{awk} programs.)
+
+@cindex portability issues
+To ensure that your @code{awk} programs are portable, always use the
+keyword @code{function} when defining a function.
+
+@node Function Example, Function Caveats, Definition Syntax, User-defined
+@section Function Definition Examples
+
+Here is an example of a user-defined function, called @code{myprint}, that
+takes a number and prints it in a specific format.
+
+@example
+function myprint(num)
+@{
+ printf "%6.3g\n", num
+@}
+@end example
+
+@noindent
+To illustrate, here is an @code{awk} rule which uses our @code{myprint}
+function:
+
+@example
+$3 > 0 @{ myprint($3) @}
+@end example
+
+@noindent
+This program prints, in our special format, all the third fields that
+contain a positive number in our input. Therefore, when given:
+
+@example
+@group
+ 1.2 3.4 5.6 7.8
+ 9.10 11.12 -13.14 15.16
+17.18 19.20 21.22 23.24
+@end group
+@end example
+
+@noindent
+this program, using our function to format the results, prints:
+
+@example
+ 5.6
+ 21.2
+@end example
+
+This function deletes all the elements in an array.
+
+@example
+function delarray(a, i)
+@{
+ for (i in a)
+ delete a[i]
+@}
+@end example
+
+When working with arrays, it is often necessary to delete all the elements
+in an array and start over with a new list of elements
+(@pxref{Delete, ,The @code{delete} Statement}).
+Instead of having
+to repeat this loop everywhere in your program that you need to clear out
+an array, your program can just call @code{delarray}.
+
+Here is an example of a recursive function. It takes a string
+as an input parameter, and returns the string in backwards order.
+
+@example
+function rev(str, start)
+@{
+ if (start == 0)
+ return ""
+
+ return (substr(str, start, 1) rev(str, start - 1))
+@}
+@end example
+
+If this function is in a file named @file{rev.awk}, we can test it
+this way:
+
+@example
+$ echo "Don't Panic!" |
+> gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk
+@print{} !cinaP t'noD
+@end example
+
+Here is an example that uses the built-in function @code{strftime}.
+(@xref{Time Functions, ,Functions for Dealing with Time Stamps},
+for more information on @code{strftime}.)
+The C @code{ctime} function takes a timestamp and returns it in a string,
+formatted in a well known fashion. Here is an @code{awk} version:
+
+@example
+@c file eg/lib/ctime.awk
+@group
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts, format)
+@{
+ format = "%a %b %d %H:%M:%S %Z %Y"
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
+@}
+@c endfile
+@end group
+@end example
+
+@node Function Caveats, Return Statement, Function Example, User-defined
+@section Calling User-defined Functions
+
+@cindex call by value
+@cindex call by reference
+@cindex calling a function
+@cindex function call
+@dfn{Calling a function} means causing the function to run and do its job.
+A function call is an expression, and its value is the value returned by
+the function.
+
+A function call consists of the function name followed by the arguments
+in parentheses. What you write in the call for the arguments are
+@code{awk} expressions; each time the call is executed, these
+expressions are evaluated, and the values are the actual arguments. For
+example, here is a call to @code{foo} with three arguments (the first
+being a string concatenation):
+
+@example
+foo(x y, "lose", 4 * z)
+@end example
+
+@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
+between the function name and the open-parenthesis of the argument list.
+If you write whitespace by mistake, @code{awk} might think that you mean
+to concatenate a variable with an expression in parentheses. However, it
+notices that you used a function name and not a variable name, and reports
+an error.
+
+@cindex call by value
+When a function is called, it is given a @emph{copy} of the values of
+its arguments. This is known as @dfn{call by value}. The caller may use
+a variable as the expression for the argument, but the called function
+does not know this: it only knows what value the argument had. For
+example, if you write this code:
+
+@example
+foo = "bar"
+z = myfunc(foo)
+@end example
+
+@noindent
+then you should not think of the argument to @code{myfunc} as being
+``the variable @code{foo}.'' Instead, think of the argument as the
+string value, @code{"bar"}.
+
+If the function @code{myfunc} alters the values of its local variables,
+this has no effect on any other variables. Thus, if @code{myfunc}
+does this:
+
+@example
+@group
+function myfunc(str)
+@{
+ print str
+ str = "zzz"
+ print str
+@}
+@end group
+@end example
+
+@noindent
+to change its first argument variable @code{str}, this @emph{does not}
+change the value of @code{foo} in the caller. The role of @code{foo} in
+calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
+If @code{str} also exists outside of @code{myfunc}, the function body
+cannot alter this outer value, because it is shadowed during the
+execution of @code{myfunc} and cannot be seen or changed from there.
+
+@cindex call by reference
+However, when arrays are the parameters to functions, they are @emph{not}
+copied. Instead, the array itself is made available for direct manipulation
+by the function. This is usually called @dfn{call by reference}.
+Changes made to an array parameter inside the body of a function @emph{are}
+visible outside that function.
+@ifinfo
+This can be @strong{very} dangerous if you do not watch what you are
+doing. For example:
+@end ifinfo
+@iftex
+@emph{This can be very dangerous if you do not watch what you are
+doing.} For example:
+@end iftex
+
+@example
+function changeit(array, ind, nvalue)
+@{
+ array[ind] = nvalue
+@}
+
+BEGIN @{
+ a[1] = 1; a[2] = 2; a[3] = 3
+ changeit(a, 2, "two")
+ printf "a[1] = %s, a[2] = %s, a[3] = %s\n",
+ a[1], a[2], a[3]
+@}
+@end example
+
+@noindent
+This program prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
+@code{changeit} stores @code{"two"} in the second element of @code{a}.
+
+@cindex undefined functions
+@cindex functions, undefined
+Some @code{awk} implementations allow you to call a function that
+has not been defined, and only report a problem at run-time when the
+program actually tries to call the function. For example:
+
+@example
+@group
+BEGIN @{
+ if (0)
+ foo()
+ else
+ bar()
+@}
+function bar() @{ @dots{} @}
+# note that `foo' is not defined
+@end group
+@end example
+
+@noindent
+Since the @samp{if} statement will never be true, it is not really a
+problem that @code{foo} has not been defined. Usually though, it is a
+problem if a program calls an undefined function.
+
+@ignore
+At one point, I had gawk dieing on this, but later decided that this might
+break old programs and/or test suites.
+@end ignore
+
+If @samp{--lint} has been specified
+(@pxref{Options, ,Command Line Options}),
+@code{gawk} will report about calls to undefined functions.
+
+Some @code{awk} implementations generate a run-time
+error if you use the @code{next} statement
+(@pxref{Next Statement, , The @code{next} Statement})
+inside a user-defined function.
+@code{gawk} does not have this problem.
+
+@node Return Statement, , Function Caveats, User-defined
+@section The @code{return} Statement
+@cindex @code{return} statement
+
+The body of a user-defined function can contain a @code{return} statement.
+This statement returns control to the rest of the @code{awk} program. It
+can also be used to return a value for use in the rest of the @code{awk}
+program. It looks like this:
+
+@example
+return @r{[}@var{expression}@r{]}
+@end example
+
+The @var{expression} part is optional. If it is omitted, then the returned
+value is undefined and, therefore, unpredictable.
+
+A @code{return} statement with no value expression is assumed at the end of
+every function definition. So if control reaches the end of the function
+body, then the function returns an unpredictable value. @code{awk}
+will @emph{not} warn you if you use the return value of such a function.
+
+Sometimes, you want to write a function for what it does, not for
+what it returns. Such a function corresponds to a @code{void} function
+in C or to a @code{procedure} in Pascal. Thus, it may be appropriate to not
+return any value; you should simply bear in mind that if you use the return
+value of such a function, you do so at your own risk.
+
+Here is an example of a user-defined function that returns a value
+for the largest number among the elements of an array:
+
+@example
+@group
+function maxelt(vec, i, ret)
+@{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end group
+@end example
+
+@noindent
+You call @code{maxelt} with one argument, which is an array name. The local
+variables @code{i} and @code{ret} are not intended to be arguments;
+while there is nothing to stop you from passing two or three arguments
+to @code{maxelt}, the results would be strange. The extra space before
+@code{i} in the function parameter list indicates that @code{i} and
+@code{ret} are not supposed to be arguments. This is a convention that
+you should follow when you define functions.
+
+Here is a program that uses our @code{maxelt} function. It loads an
+array, calls @code{maxelt}, and then reports the maximum number in that
+array:
+
+@example
+@group
+awk '
+function maxelt(vec, i, ret)
+@{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end group
+
+@group
+# Load all fields of each record into nums.
+@{
+ for(i = 1; i <= NF; i++)
+ nums[NR, i] = $i
+@}
+
+END @{
+ print maxelt(nums)
+@}'
+@end group
+@end example
+
+Given the following input:
+
+@example
+@group
+ 1 5 23 8 16
+44 3 5 2 8 26
+256 291 1396 2962 100
+-6 467 998 1101
+99385 11 0 225
+@end group
+@end example
+
+@noindent
+our program tells us (predictably) that @code{99385} is the largest number
+in our array.
+
+@node Invoking Gawk, Library Functions, User-defined, Top
+@chapter Running @code{awk}
+@cindex command line
+@cindex invocation of @code{gawk}
+@cindex arguments, command line
+@cindex options, command line
+@cindex long options
+@cindex options, long
+
+There are two ways to run @code{awk}: with an explicit program, or with
+one or more program files. Here are templates for both of them; items
+enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
+
+Besides traditional one-letter POSIX-style options, @code{gawk} also
+supports GNU long options.
+
+@example
+awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+@cindex empty program
+@cindex dark corner
+It is possible to invoke @code{awk} with an empty program:
+
+@example
+$ awk '' datafile1 datafile2
+@end example
+
+@noindent
+Doing so makes little sense though; @code{awk} will simply exit
+silently when given an empty program (d.c.). If @samp{--lint} has
+been specified on the command line, @code{gawk} will issue a
+warning that the program is empty.
+
+@menu
+* Options:: Command line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+* Known Bugs:: Known Bugs in @code{gawk}.
+@end menu
+
+@node Options, Other Arguments, Invoking Gawk, Invoking Gawk
+@section Command Line Options
+
+Options begin with a dash, and consist of a single character.
+GNU style long options consist of two dashes and a keyword.
+The keyword can be abbreviated, as long the abbreviation allows the option
+to be uniquely identified. If the option takes an argument, then the
+keyword is either immediately followed by an equals sign (@samp{=}) and the
+argument's value, or the keyword and the argument's value are separated
+by whitespace. For brevity, the discussion below only refers to the
+traditional short options; however the long and short options are
+interchangeable in all contexts.
+
+Each long option for @code{gawk} has a corresponding
+POSIX-style option. The options and their meanings are as follows:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+@cindex @code{-F} option
+@cindex @code{--field-separator} option
+Sets the @code{FS} variable to @var{fs}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item -f @var{source-file}
+@itemx --file @var{source-file}
+@cindex @code{-f} option
+@cindex @code{--file} option
+Indicates that the @code{awk} program is to be found in @var{source-file}
+instead of in the first non-option argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+@cindex @code{-v} option
+@cindex @code{--assign} option
+Sets the variable @var{var} to the value @var{val} @strong{before}
+execution of the program begins. Such variable values are available
+inside the @code{BEGIN} rule
+(@pxref{Other Arguments, ,Other Command Line Arguments}).
+
+The @samp{-v} option can only set one variable, but you can use
+it more than once, setting another variable each time, like this:
+@samp{awk @w{-v foo=1} @w{-v bar=2} @dots{}}.
+
+@item -mf @var{NNN}
+@itemx -mr @var{NNN}
+Set various memory limits to the value @var{NNN}. The @samp{f} flag sets
+the maximum number of fields, and the @samp{r} flag sets the maximum
+record size. These two flags and the @samp{-m} option are from the
+Bell Labs research version of Unix @code{awk}. They are provided
+for compatibility, but otherwise ignored by
+@code{gawk}, since @code{gawk} has no predefined limits.
+
+@item -W @var{gawk-opt}
+@cindex @code{-W} option
+Following the POSIX standard, options that are implementation
+specific are supplied as arguments to the @samp{-W} option. These options
+also have corresponding GNU style long options.
+See below.
+
+@item --
+Signals the end of the command line options. The following arguments
+are not treated as options even if they begin with @samp{-}. This
+interpretation of @samp{--} follows the POSIX argument parsing
+conventions.
+
+This is useful if you have file names that start with @samp{-},
+or in shell scripts, if you have file names that will be specified
+by the user which could start with @samp{-}.
+@end table
+
+The following @code{gawk}-specific options are available:
+
+@table @code
+@item -W traditional
+@itemx -W compat
+@itemx --traditional
+@itemx --compat
+@cindex @code{--compat} option
+@cindex @code{--traditional} option
+@cindex compatibility mode
+Specifies @dfn{compatibility mode}, in which the GNU extensions to
+the @code{awk} language are disabled, so that @code{gawk} behaves just
+like the Bell Labs research version of Unix @code{awk}.
+@samp{--traditional} is the preferred form of this option.
+@xref{POSIX/GNU, ,Extensions in @code{gawk} Not in POSIX @code{awk}},
+which summarizes the extensions. Also see
+@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+@cindex @code{--copyleft} option
+@cindex @code{--copyright} option
+Print the short version of the General Public License, and then exit.
+This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+@cindex @code{--help} option
+@cindex @code{--usage} option
+Print a ``usage'' message summarizing the short and long style options
+that @code{gawk} accepts, and then exit.
+
+@item -W lint
+@itemx --lint
+@cindex @code{--lint} option
+Warn about constructs that are dubious or non-portable to
+other @code{awk} implementations.
+Some warnings are issued when @code{gawk} first reads your program. Others
+are issued at run-time, as your program executes.
+
+@item -W lint-old
+@itemx --lint-old
+@cindex @code{--lint-old} option
+Warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}
+(@pxref{V7/SVR3.1, , Major Changes between V7 and SVR3.1}).
+
+@item -W posix
+@itemx --posix
+@cindex @code{--posix} option
+@cindex POSIX mode
+Operate in strict POSIX mode. This disables all @code{gawk}
+extensions (just like @samp{--traditional}), and adds the following additional
+restrictions:
+
+@c IMPORTANT! Keep this list in sync with the one in node POSIX
+
+@itemize @bullet
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Escape Sequences}).
+
+@item
+Newlines do not act as whitespace to separate fields when @code{FS} is
+equal to a single space.
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Function Definition Syntax}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item
+The @code{fflush} built-in function is not supported
+(@pxref{I/O Functions, , Built-in Functions for Input/Output}).
+@end itemize
+
+If you supply both @samp{--traditional} and @samp{--posix} on the
+command line, @samp{--posix} will take precedence. @code{gawk}
+will also issue a warning if both options are supplied.
+
+@item -W re-interval
+@itemx --re-interval
+Allow interval expressions
+(@pxref{Regexp Operators, , Regular Expression Operators}),
+in regexps.
+Because interval expressions were traditionally not available in @code{awk},
+@code{gawk} does not provide them by default. This prevents old @code{awk}
+programs from breaking.
+
+@item -W source @var{program-text}
+@itemx --source @var{program-text}
+@cindex @code{--source} option
+Program source code is taken from the @var{program-text}. This option
+allows you to mix source code in files with source
+code that you enter on the command line. This is particularly useful
+when you have library functions that you wish to use from your command line
+programs (@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+@item -W version
+@itemx --version
+@cindex @code{--version} option
+Prints version information for this particular copy of @code{gawk}.
+This allows you to determine if your copy of @code{gawk} is up to date
+with respect to whatever the Free Software Foundation is currently
+distributing.
+It is also useful for bug reports
+(@pxref{Bugs, , Reporting Problems and Bugs}).
+@end table
+
+Any other options are flagged as invalid with a warning message, but
+are otherwise ignored.
+
+In compatibility mode, as a special case, if the value of @var{fs} supplied
+to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
+character (@code{"\t"}). This is only true for @samp{--traditional}, and not
+for @samp{--posix}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+The @samp{-f} option may be used more than once on the command line.
+If it is, @code{awk} reads its program source from all of the named files, as
+if they had been concatenated together into one big file. This is
+useful for creating libraries of @code{awk} functions. Useful functions
+can be written once, and then retrieved from a standard place, instead
+of having to be included into each individual program.
+
+You can type in a program at the terminal and still use library functions,
+by specifying @samp{-f /dev/tty}. @code{awk} will read a file from the terminal
+to use as part of the @code{awk} program. After typing your program,
+type @kbd{Control-d} (the end-of-file character) to terminate it.
+(You may also use @samp{-f -} to read program source from the standard
+input, but then you will not be able to also use the standard input as a
+source of data.)
+
+Because it is clumsy using the standard @code{awk} mechanisms to mix source
+file and command line @code{awk} programs, @code{gawk} provides the
+@samp{--source} option. This does not require you to pre-empt the standard
+input for your source code, and allows you to easily mix command line
+and library source code
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+If no @samp{-f} or @samp{--source} option is specified, then @code{gawk}
+will use the first non-option command line argument as the text of the
+program source code.
+
+@cindex @code{POSIXLY_CORRECT} environment variable
+@cindex environment variable, @code{POSIXLY_CORRECT}
+If the environment variable @code{POSIXLY_CORRECT} exists,
+then @code{gawk} will behave in strict POSIX mode, exactly as if
+you had supplied the @samp{--posix} command line option.
+Many GNU programs look for this environment variable to turn on
+strict POSIX mode. If you supply @samp{--lint} on the command line,
+and @code{gawk} turns on POSIX mode because of @code{POSIXLY_CORRECT},
+then it will print a warning message indicating that POSIX
+mode is in effect.
+
+You would typically set this variable in your shell's startup file.
+For a Bourne compatible shell (such as Bash), you would add these
+lines to the @file{.profile} file in your home directory.
+
+@example
+@group
+POSIXLY_CORRECT=true
+export POSIXLY_CORRECT
+@end group
+@end example
+
+For a @code{csh} compatible shell,@footnote{Not recommended.}
+you would add this line to the @file{.login} file in your home directory.
+
+@example
+setenv POSIXLY_CORRECT true
+@end example
+
+@node Other Arguments, AWKPATH Variable, Options, Invoking Gawk
+@section Other Command Line Arguments
+
+Any additional arguments on the command line are normally treated as
+input files to be processed in the order specified. However, an
+argument that has the form @code{@var{var}=@var{value}}, assigns
+the value @var{value} to the variable @var{var}---it does not specify a
+file at all.
+
+@vindex ARGIND
+@vindex ARGV
+All these arguments are made available to your @code{awk} program in the
+@code{ARGV} array (@pxref{Built-in Variables}). Command line options
+and the program text (if present) are omitted from @code{ARGV}.
+All other arguments, including variable assignments, are
+included. As each element of @code{ARGV} is processed, @code{gawk}
+sets the variable @code{ARGIND} to the index in @code{ARGV} of the
+current element.
+
+The distinction between file name arguments and variable-assignment
+arguments is made when @code{awk} is about to open the next input file.
+At that point in execution, it checks the ``file name'' to see whether
+it is really a variable assignment; if so, @code{awk} sets the variable
+instead of reading a file.
+
+Therefore, the variables actually receive the given values after all
+previously specified files have been read. In particular, the values of
+variables assigned in this fashion are @emph{not} available inside a
+@code{BEGIN} rule
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}),
+since such rules are run before @code{awk} begins scanning the argument list.
+
+@cindex dark corner
+The variable values given on the command line are processed for escape
+sequences (d.c.) (@pxref{Escape Sequences}).
+
+In some earlier implementations of @code{awk}, when a variable assignment
+occurred before any file names, the assignment would happen @emph{before}
+the @code{BEGIN} rule was executed. @code{awk}'s behavior was thus
+inconsistent; some command line assignments were available inside the
+@code{BEGIN} rule, while others were not. However,
+some applications came to depend
+upon this ``feature.'' When @code{awk} was changed to be more consistent,
+the @samp{-v} option was added to accommodate applications that depended
+upon the old behavior.
+
+The variable assignment feature is most useful for assigning to variables
+such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
+output formats, before scanning the data files. It is also useful for
+controlling state if multiple passes are needed over a data file. For
+example:
+
+@cindex multiple passes over data
+@cindex passes, multiple
+@example
+awk 'pass == 1 @{ @var{pass 1 stuff} @}
+ pass == 2 @{ @var{pass 2 stuff} @}' pass=1 mydata pass=2 mydata
+@end example
+
+Given the variable assignment feature, the @samp{-F} option for setting
+the value of @code{FS} is not
+strictly necessary. It remains for historical compatibility.
+
+@node AWKPATH Variable, Obsolete, Other Arguments, Invoking Gawk
+@section The @code{AWKPATH} Environment Variable
+@cindex @code{AWKPATH} environment variable
+@cindex environment variable, @code{AWKPATH}
+@cindex search path
+@cindex directory search
+@cindex path, search
+@cindex differences between @code{gawk} and @code{awk}
+
+The previous section described how @code{awk} program files can be named
+on the command line with the @samp{-f} option. In most @code{awk}
+implementations, you must supply a precise path name for each program
+file, unless the file is in the current directory.
+
+@cindex search path, for source files
+But in @code{gawk}, if the file name supplied to the @samp{-f} option
+does not contain a @samp{/}, then @code{gawk} searches a list of
+directories (called the @dfn{search path}), one by one, looking for a
+file with the specified name.
+
+The search path is a string consisting of directory names
+separated by colons. @code{gawk} gets its search path from the
+@code{AWKPATH} environment variable. If that variable does not exist,
+@code{gawk} uses a default path, which is
+@samp{.:/usr/local/share/awk}.@footnote{Your version of @code{gawk}
+may use a directory that is different than @file{/usr/local/share/awk}; it
+will depend upon how @code{gawk} was built and installed. The actual
+directory will be the value of @samp{$(datadir)} generated when
+@code{gawk} was configured. You probably don't need to worry about this
+though.} (Programs written for use by
+system administrators should use an @code{AWKPATH} variable that
+does not include the current directory, @file{.}.)
+
+The search path feature is particularly useful for building up libraries
+of useful @code{awk} functions. The library files can be placed in a
+standard directory that is in the default path, and then specified on
+the command line with a short file name. Otherwise, the full file name
+would have to be typed for each file.
+
+By using both the @samp{--source} and @samp{-f} options, your command line
+@code{awk} programs can use facilities in @code{awk} library files.
+@xref{Library Functions, , A Library of @code{awk} Functions}.
+
+Path searching is not done if @code{gawk} is in compatibility mode.
+This is true for both @samp{--traditional} and @samp{--posix}.
+@xref{Options, ,Command Line Options}.
+
+@strong{Note:} if you want files in the current directory to be found,
+you must include the current directory in the path, either by including
+@file{.} explicitly in the path, or by writing a null entry in the
+path. (A null entry is indicated by starting or ending the path with a
+colon, or by placing two colons next to each other (@samp{::}).) If the
+current directory is not included in the path, then files cannot be
+found in the current directory. This path search mechanism is identical
+to the shell's.
+@c someday, @cite{The Bourne Again Shell}....
+
+Starting with version 3.0, if @code{AWKPATH} is not defined in the
+environment, @code{gawk} will place its default search path into
+@code{ENVIRON["AWKPATH"]}. This makes it easy to determine
+the actual search path @code{gawk} will use.
+
+@node Obsolete, Undocumented, AWKPATH Variable, Invoking Gawk
+@section Obsolete Options and/or Features
+
+@cindex deprecated options
+@cindex obsolete options
+@cindex deprecated features
+@cindex obsolete features
+This section describes features and/or command line options from
+previous releases of @code{gawk} that are either not available in the
+current version, or that are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+@c update this section for each release!
+
+For version @value{VERSION}.@value{PATCHLEVEL} of @code{gawk}, there are no
+command line options
+or other deprecated features from the previous version of @code{gawk}.
+@iftex
+This section
+@end iftex
+@ifinfo
+This node
+@end ifinfo
+is thus essentially a place holder,
+in case some option becomes obsolete in a future version of @code{gawk}.
+
+@ignore
+@c This is pretty old news...
+The public-domain version of @code{strftime} that is distributed with
+@code{gawk} changed for the 2.14 release. The @samp{%V} conversion specifier
+that used to generate the date in VMS format was changed to @samp{%v}.
+This is because the POSIX standard for the @code{date} utility now
+specifies a @samp{%V} conversion specifier.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
+@end ignore
+
+@node Undocumented, Known Bugs, Obsolete, Invoking Gawk
+@section Undocumented Options and Features
+@cindex undocumented features
+@display
+@i{Use the Source, Luke!}
+Obi-Wan
+@end display
+@sp 1
+
+This section intentionally left blank.
+
+@c Read The Source, Luke!
+
+@ignore
+@c If these came out in the Info file or TeX document, then they wouldn't
+@c be undocumented, would they?
+
+@code{gawk} has one undocumented option:
+
+@table @code
+@item -W nostalgia
+@itemx --nostalgia
+Print the message @code{"awk: bailing out near line 1"} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @code{awk}, and by a t--shirt.
+@end table
+
+Early versions of @code{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @code{awk} programs. Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@code{gawk} actually supports this, but it is purposely undocumented
+since it is considered bad style. The correct way to write such a program
+is either
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or
+
+@example
+awk '@{ sum += $1 @}
+ END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a fuller
+explanation.
+
+@end ignore
+
+@node Known Bugs, , Undocumented, Invoking Gawk
+@section Known Bugs in @code{gawk}
+@cindex bugs, known in @code{gawk}
+@cindex known bugs
+
+@itemize @bullet
+@item
+The @samp{-F} option for changing the value of @code{FS}
+(@pxref{Options, ,Command Line Options})
+is not necessary given the command line variable
+assignment feature; it remains only for backwards compatibility.
+
+@item
+If your system actually has support for @file{/dev/fd} and the
+associated @file{/dev/stdin}, @file{/dev/stdout}, and
+@file{/dev/stderr} files, you may get different output from @code{gawk}
+than you would get on a system without those files. When @code{gawk}
+interprets these files internally, it synchronizes output to the
+standard output with output to @file{/dev/stdout}, while on a system
+with those files, the output is actually to different open files
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+
+@item
+Syntactically invalid single character programs tend to overflow
+the parse stack, generating a rather unhelpful message. Such programs
+are surprisingly difficult to diagnose in the completely general case,
+and the effort to do so really is not worth it.
+@end itemize
+
+@node Library Functions, Sample Programs, Invoking Gawk, Top
+@chapter A Library of @code{awk} Functions
+
+@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
+This chapter presents a library of useful @code{awk} functions. The
+sample programs presented later
+(@pxref{Sample Programs, ,Practical @code{awk} Programs})
+use these functions.
+The functions are presented here in a progression from simple to complex.
+
+@ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+presents a program that you can use to extract the source code for
+these example library functions and programs from the Texinfo source
+for this @value{DOCUMENT}.
+(This has already been done as part of the @code{gawk} distribution.)
+
+If you have written one or more useful, general purpose @code{awk} functions,
+and would like to contribute them for a subsequent edition of this @value{DOCUMENT},
+please contact the author. @xref{Bugs, ,Reporting Problems and Bugs},
+for information on doing this. Don't just send code, as you will be
+required to either place your code in the public domain,
+publish it under the GPL (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}),
+or assign the copyright in it to the Free Software Foundation.
+
+@menu
+* Portability Notes:: What to do if you don't have @code{gawk}.
+* Nextfile Function:: Two implementations of a @code{nextfile}
+ function.
+* Assert Function:: A function for assertions in @code{awk}
+ programs.
+* Round Function:: A function for rounding if @code{sprintf} does
+ not do it correctly.
+* Ordinal Functions:: Functions for using characters as numbers and
+ vice versa.
+* Join Function:: A function to join an array into a string.
+* Mktime Function:: A function to turn a date into a timestamp.
+* Gettimeofday Function:: A function to get formatted times.
+* Filetrans Function:: A function for handling data file transitions.
+* Getopt Function:: A function for processing command line
+ arguments.
+* Passwd Functions:: Functions for getting user information.
+* Group Functions:: Functions for getting group information.
+* Library Names:: How to best name private global variables in
+ library functions.
+@end menu
+
+@node Portability Notes, Nextfile Function, Library Functions, Library Functions
+@section Simulating @code{gawk}-specific Features
+@cindex portability issues
+
+The programs in this chapter and in
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+freely use features that are specific to @code{gawk}.
+This section briefly discusses how you can rewrite these programs for
+different implementations of @code{awk}.
+
+Diagnostic error messages are sent to @file{/dev/stderr}.
+Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"}, if your system
+does not have a @file{/dev/stderr}, or if you cannot use @code{gawk}.
+
+A number of programs use @code{nextfile}
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}),
+to skip any remaining input in the input file.
+@ref{Nextfile Function, ,Implementing @code{nextfile} as a Function},
+shows you how to write a function that will do the same thing.
+
+Finally, some of the programs choose to ignore upper-case and lower-case
+distinctions in their input. They do this by assigning one to @code{IGNORECASE}.
+You can achieve the same effect by adding the following rule to the
+beginning of the program:
+
+@example
+# ignore case
+@{ $0 = tolower($0) @}
+@end example
+
+@noindent
+Also, verify that all regexp and string constants used in
+comparisons only use lower-case letters.
+
+@node Nextfile Function, Assert Function, Portability Notes, Library Functions
+@section Implementing @code{nextfile} as a Function
+
+@cindex skipping input files
+@cindex input files, skipping
+The @code{nextfile} statement presented in
+@ref{Nextfile Statement, ,The @code{nextfile} Statement},
+is a @code{gawk}-specific extension. It is not available in other
+implementations of @code{awk}. This section shows two versions of a
+@code{nextfile} function that you can use to simulate @code{gawk}'s
+@code{nextfile} statement if you cannot use @code{gawk}.
+
+Here is a first attempt at writing a @code{nextfile} function.
+
+@example
+@group
+# nextfile --- skip remaining records in current file
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME @{ next @}
+@end group
+@end example
+
+This file should be included before the main program, because it supplies
+a rule that must be executed first. This rule compares the current data
+file's name (which is always in the @code{FILENAME} variable) to a private
+variable named @code{_abandon_}. If the file name matches, then the action
+part of the rule executes a @code{next} statement, to go on to the next
+record. (The use of @samp{_} in the variable name is a convention.
+It is discussed more fully in
+@ref{Library Names, , Naming Library Function Global Variables}.)
+
+The use of the @code{next} statement effectively creates a loop that reads
+all the records from the current data file.
+Eventually, the end of the file is reached, and
+a new data file is opened, changing the value of @code{FILENAME}.
+Once this happens, the comparison of @code{_abandon_} to @code{FILENAME}
+fails, and execution continues with the first rule of the ``real'' program.
+
+The @code{nextfile} function itself simply sets the value of @code{_abandon_}
+and then executes a @code{next} statement to start the loop
+going.@footnote{Some implementations of @code{awk} do not allow you to
+execute @code{next} from within a function body. Some other work-around
+will be necessary if you use such a version.}
+@c mawk is what we're talking about.
+
+This initial version has a subtle problem. What happens if the same data
+file is listed @emph{twice} on the command line, one right after the other,
+or even with just a variable assignment between the two occurrences of
+the file name?
+
+@c @findex nextfile
+@c do it this way, since all the indices are merged
+@cindex @code{nextfile} function
+In such a case,
+this code will skip right through the file, a second time, even though
+it should stop when it gets to the end of the first occurrence.
+Here is a second version of @code{nextfile} that remedies this problem.
+
+@example
+@group
+@c file eg/lib/nextfile.awk
+# nextfile --- skip remaining records in current file
+# correctly handle successive occurrences of the same file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME @{
+ if (FNR == 1)
+ _abandon_ = ""
+ else
+ next
+@}
+@c endfile
+@end group
+@end example
+
+The @code{nextfile} function has not changed. It sets @code{_abandon_}
+equal to the current file name and then executes a @code{next} satement.
+The @code{next} statement reads the next record and increments @code{FNR},
+so @code{FNR} is guaranteed to have a value of at least two.
+However, if @code{nextfile} is called for the last record in the file,
+then @code{awk} will close the current data file and move on to the next
+one. Upon doing so, @code{FILENAME} will be set to the name of the new file,
+and @code{FNR} will be reset to one. If this next file is the same as
+the previous one, @code{_abandon_} will still be equal to @code{FILENAME}.
+However, @code{FNR} will be equal to one, telling us that this is a new
+occurrence of the file, and not the one we were reading when the
+@code{nextfile} function was executed. In that case, @code{_abandon_}
+is reset to the empty string, so that further executions of this rule
+will fail (until the next time that @code{nextfile} is called).
+
+If @code{FNR} is not one, then we are still in the original data file,
+and the program executes a @code{next} statement to skip through it.
+
+An important question to ask at this point is: ``Given that the
+functionality of @code{nextfile} can be provided with a library file,
+why is it built into @code{gawk}?'' This is an important question. Adding
+features for little reason leads to larger, slower programs that are
+harder to maintain.
+
+The answer is that building @code{nextfile} into @code{gawk} provides
+significant gains in efficiency. If the @code{nextfile} function is executed
+at the beginning of a large data file, @code{awk} still has to scan the entire
+file, splitting it up into records, just to skip over it. The built-in
+@code{nextfile} can simply close the file immediately and proceed to the
+next one, saving a lot of time. This is particularly important in
+@code{awk}, since @code{awk} programs are generally I/O bound (i.e.@:
+they spend most of their time doing input and output, instead of performing
+computations).
+
+@node Assert Function, Round Function, Nextfile Function, Library Functions
+@section Assertions
+
+@cindex assertions
+@cindex @code{assert}, C version
+When writing large programs, it is often useful to be able to know
+that a condition or set of conditions is true. Before proceeding with a
+particular computation, you make a statement about what you believe to be
+the case. Such a statement is known as an
+``assertion.'' The C language provides an @code{<assert.h>} header file
+and corresponding @code{assert} macro that the programmer can use to make
+assertions. If an assertion fails, the @code{assert} macro arranges to
+print a diagnostic message describing the condition that should have
+been true but was not, and then it kills the program. In C, using
+@code{assert} looks this:
+
+@example
+#include <assert.h>
+
+int myfunc(int a, double b)
+@{
+ assert(a <= 5 && b >= 17);
+ @dots{}
+@}
+@end example
+
+If the assertion failed, the program would print a message similar to
+this:
+
+@example
+prog.c:5: assertion failed: a <= 5 && b >= 17
+@end example
+
+@findex assert
+The ANSI C language makes it possible to turn the condition into a string for use
+in printing the diagnostic message. This is not possible in @code{awk}, so
+this @code{assert} function also requires a string version of the condition
+that is being tested.
+
+@example
+@c @group
+@c file eg/lib/assert.awk
+# assert --- assert that a condition is true. Otherwise exit.
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+function assert(condition, string)
+@{
+ if (! condition) @{
+ printf("%s:%d: assertion failed: %s\n",
+ FILENAME, FNR, string) > "/dev/stderr"
+ _assert_exit = 1
+ exit 1
+ @}
+@}
+
+END @{
+ if (_assert_exit)
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{assert} function tests the @code{condition} parameter. If it
+is false, it prints a message to standard error, using the @code{string}
+parameter to describe the failed condition. It then sets the variable
+@code{_assert_exit} to one, and executes the @code{exit} statement.
+The @code{exit} statement jumps to the @code{END} rule. If the @code{END}
+rules finds @code{_assert_exit} to be true, then it exits immediately.
+
+The purpose of the @code{END} rule with its test is to
+keep any other @code{END} rules from running. When an assertion fails, the
+program should exit immediately.
+If no assertions fail, then @code{_assert_exit} will still be
+false when the @code{END} rule is run normally, and the rest of the
+program's @code{END} rules will execute.
+For all of this to work correctly, @file{assert.awk} must be the
+first source file read by @code{awk}.
+
+You would use this function in your programs this way:
+
+@example
+function myfunc(a, b)
+@{
+ assert(a <= 5 && b >= 17, "a <= 5 && b >= 17")
+ @dots{}
+@}
+@end example
+
+@noindent
+If the assertion failed, you would see a message like this:
+
+@example
+mydata:1357: assertion failed: a <= 5 && b >= 17
+@end example
+
+There is a problem with this version of @code{assert}, that it may not
+be possible to work around. An @code{END} rule is automatically added
+to the program calling @code{assert}. Normally, if a program consists
+of just a @code{BEGIN} rule, the input files and/or standard input are
+not read. However, now that the program has an @code{END} rule, @code{awk}
+will attempt to read the input data files, or standard input
+(@pxref{Using BEGIN/END, , Startup and Cleanup Actions}),
+most likely causing the program to hang, waiting for input.
+
+@node Round Function, Ordinal Functions, Assert Function, Library Functions
+@section Rounding Numbers
+
+@cindex rounding
+The way @code{printf} and @code{sprintf}
+(@pxref{Printf, , Using @code{printf} Statements for Fancier Printing})
+do rounding will often depend
+upon the system's C @code{sprintf} subroutine.
+On many machines,
+@code{sprintf} rounding is ``unbiased,'' which means it doesn't always
+round a trailing @samp{.5} up, contrary to naive expectations. In unbiased
+rounding, @samp{.5} rounds to even, rather than always up, so 1.5 rounds to
+2 but 4.5 rounds to 4.
+The result is that if you are using a format that does
+rounding (e.g., @code{"%.0f"}) you should check what your system does.
+The following function does traditional rounding;
+it might be useful if your awk's @code{printf} does unbiased rounding.
+
+@findex round
+@example
+@c file eg/lib/round.awk
+# round --- do normal rounding
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, August, 1996
+# Public Domain
+
+function round(x, ival, aval, fraction)
+@{
+ ival = int(x) # integer part, int() truncates
+
+ # see if fractional part
+ if (ival == x) # no fraction
+ return x
+
+ if (x < 0) @{
+ aval = -x # absolute value
+ ival = int(aval)
+ fraction = aval - ival
+ if (fraction >= .5)
+ return int(x) - 1 # -2.5 --> -3
+ else
+ return int(x) # -2.3 --> -2
+ @} else @{
+ fraction = x - ival
+ if (fraction >= .5)
+ return ival + 1
+ else
+ return ival
+ @}
+@}
+
+# test harness
+@{ print $0, round($0) @}
+@c endfile
+@end example
+
+@node Ordinal Functions, Join Function, Round Function, Library Functions
+@section Translating Between Characters and Numbers
+
+@cindex numeric character values
+@cindex values of characters as numbers
+One commercial implementation of @code{awk} supplies a built-in function,
+@code{ord}, which takes a character and returns the numeric value for that
+character in the machine's character set. If the string passed to
+@code{ord} has more than one character, only the first one is used.
+
+The inverse of this function is @code{chr} (from the function of the same
+name in Pascal), which takes a number and returns the corresponding character.
+
+Both functions can be written very nicely in @code{awk}; there is no real
+reason to build them into the @code{awk} interpreter.
+
+@findex ord
+@findex chr
+@example
+@group
+@c file eg/lib/ord.awk
+# ord.awk --- do ord and chr
+#
+# Global identifiers:
+# _ord_: numerical values indexed by characters
+# _ord_init: function to initialize _ord_
+#
+# Arnold Robbins
+# arnold@@gnu.ai.mit.edu
+# Public Domain
+# 16 January, 1992
+# 20 July, 1992, revised
+
+BEGIN @{ _ord_init() @}
+@c endfile
+@end group
+
+@c @group
+@c file eg/lib/ord.awk
+function _ord_init( low, high, i, t)
+@{
+ low = sprintf("%c", 7) # BEL is ascii 7
+ if (low == "\a") @{ # regular ascii
+ low = 0
+ high = 127
+ @} else if (sprintf("%c", 128 + 7) == "\a") @{
+ # ascii, mark parity
+ low = 128
+ high = 255
+ @} else @{ # ebcdic(!)
+ low = 0
+ high = 255
+ @}
+
+ for (i = low; i <= high; i++) @{
+ t = sprintf("%c", i)
+ _ord_[t] = i
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+@cindex character sets
+@cindex character encodings
+@cindex ASCII
+@cindex EBCDIC
+@cindex mark parity
+Some explanation of the numbers used by @code{chr} is worthwhile.
+The most prominent character set in use today is ASCII. Although an
+eight-bit byte can hold 256 distinct values (from zero to 255), ASCII only
+defines characters that use the values from zero to 127.@footnote{ASCII
+has been extended in many countries to use the values from 128 to 255
+for country-specific characters. If your system uses these extensions,
+you can simplify @code{_ord_init} to simply loop from zero to 255.}
+At least one computer manufacturer that we know of
+@c Pr1me, blech
+uses ASCII, but with mark parity, meaning that the leftmost bit in the byte
+is always one. What this means is that on those systems, characters
+have numeric values from 128 to 255.
+Finally, large mainframe systems use the EBCDIC character set, which
+uses all 256 values.
+While there are other character sets in use on some older systems,
+they are not really worth worrying about.
+
+@example
+@group
+@c file eg/lib/ord.awk
+function ord(str, c)
+@{
+ # only first character is of interest
+ c = substr(str, 1, 1)
+ return _ord_[c]
+@}
+@c endfile
+@end group
+
+@group
+@c file eg/lib/ord.awk
+function chr(c)
+@{
+ # force c to be numeric by adding 0
+ return sprintf("%c", c + 0)
+@}
+@c endfile
+@end group
+
+@c @group
+@c file eg/lib/ord.awk
+#### test code ####
+# BEGIN \
+# @{
+# for (;;) @{
+# printf("enter a character: ")
+# if (getline var <= 0)
+# break
+# printf("ord(%s) = %d\n", var, ord(var))
+# @}
+# @}
+@c endfile
+@c @end group
+@end example
+
+An obvious improvement to these functions would be to move the code for the
+@code{@w{_ord_init}} function into the body of the @code{BEGIN} rule. It was
+written this way initially for ease of development.
+
+There is a ``test program'' in a @code{BEGIN} rule, for testing the
+function. It is commented out for production use.
+
+@node Join Function, Mktime Function, Ordinal Functions, Library Functions
+@section Merging an Array Into a String
+
+@cindex merging strings
+When doing string processing, it is often useful to be able to join
+all the strings in an array into one long string. The following function,
+@code{join}, accomplishes this task. It is used later in several of
+the application programs
+(@pxref{Sample Programs, ,Practical @code{awk} Programs}).
+
+Good function design is important; this function needs to be general, but it
+should also have a reasonable default behavior. It is called with an array
+and the beginning and ending indices of the elements in the array to be
+merged. This assumes that the array indices are numeric---a reasonable
+assumption since the array was likely created with @code{split}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@findex join
+@example
+@group
+@c file eg/lib/join.awk
+# join.awk --- join an array into a string
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function join(array, start, end, sep, result, i)
+@{
+ if (sep == "")
+ sep = " "
+ else if (sep == SUBSEP) # magic value
+ sep = ""
+ result = array[start]
+ for (i = start + 1; i <= end; i++)
+ result = result sep array[i]
+ return result
+@}
+@c endfile
+@end group
+@end example
+
+An optional additional argument is the separator to use when joining the
+strings back together. If the caller supplies a non-empty value,
+@code{join} uses it. If it is not supplied, it will have a null
+value. In this case, @code{join} uses a single blank as a default
+separator for the strings. If the value is equal to @code{SUBSEP},
+then @code{join} joins the strings with no separator between them.
+@code{SUBSEP} serves as a ``magic'' value to indicate that there should
+be no separation between the component strings.
+
+It would be nice if @code{awk} had an assignment operator for concatenation.
+The lack of an explicit operator for concatenation makes string operations
+more difficult than they really need to be.
+
+@node Mktime Function, Gettimeofday Function, Join Function, Library Functions
+@section Turning Dates Into Timestamps
+
+The @code{systime} function built in to @code{gawk}
+returns the current time of day as
+a timestamp in ``seconds since the Epoch.'' This timestamp
+can be converted into a printable date of almost infinitely variable
+format using the built-in @code{strftime} function.
+(For more information on @code{systime} and @code{strftime},
+@pxref{Time Functions, ,Functions for Dealing with Time Stamps}.)
+
+@cindex converting dates to timestamps
+@cindex dates, converting to timestamps
+@cindex timestamps, converting from dates
+An interesting but difficult problem is to convert a readable representation
+of a date back into a timestamp. The ANSI C library provides a @code{mktime}
+function that does the basic job, converting a canonical representation of a
+date into a timestamp.
+
+It would appear at first glance that @code{gawk} would have to supply a
+@code{mktime} built-in function that was simply a ``hook'' to the C language
+version. In fact though, @code{mktime} can be implemented entirely in
+@code{awk}.
+
+Here is a version of @code{mktime} for @code{awk}. It takes a simple
+representation of the date and time, and converts it into a timestamp.
+
+The code is presented here intermixed with explanatory prose. In
+@ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+you will see how the Texinfo source file for this @value{DOCUMENT}
+can be processed to extract the code into a single source file.
+
+The program begins with a descriptive comment and a @code{BEGIN} rule
+that initializes a table @code{_tm_months}. This table is a two-dimensional
+array that has the lengths of the months. The first index is zero for
+regular years, and one for leap years. The values are the same for all the
+months in both kinds of years, except for February; thus the use of multiple
+assignment.
+
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# mktime.awk --- convert a canonical date representation
+# into a timestamp
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+@{
+ # Initialize table of month lengths
+ _tm_months[0,1] = _tm_months[1,1] = 31
+ _tm_months[0,2] = 28; _tm_months[1,2] = 29
+ _tm_months[0,3] = _tm_months[1,3] = 31
+ _tm_months[0,4] = _tm_months[1,4] = 30
+ _tm_months[0,5] = _tm_months[1,5] = 31
+ _tm_months[0,6] = _tm_months[1,6] = 30
+ _tm_months[0,7] = _tm_months[1,7] = 31
+ _tm_months[0,8] = _tm_months[1,8] = 31
+ _tm_months[0,9] = _tm_months[1,9] = 30
+ _tm_months[0,10] = _tm_months[1,10] = 31
+ _tm_months[0,11] = _tm_months[1,11] = 30
+ _tm_months[0,12] = _tm_months[1,12] = 31
+@}
+@c endfile
+@c @end group
+@end example
+
+The benefit of merging multiple @code{BEGIN} rules
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+is particularly clear when writing library files. Functions in library
+files can cleanly initialize their own private data and also provide clean-up
+actions in private @code{END} rules.
+
+The next function is a simple one that computes whether a given year is or
+is not a leap year. If a year is evenly divisible by four, but not evenly
+divisible by 100, or if it is evenly divisible by 400, then it is a leap
+year. Thus, 1904 was a leap year, 1900 was not, but 2000 will be.
+@c Change this after the year 2000 to ``2000 was'' (:-)
+
+@findex _tm_isleap
+@example
+@group
+@c file eg/lib/mktime.awk
+# decide if a year is a leap year
+function _tm_isleap(year, ret)
+@{
+ ret = (year % 4 == 0 && year % 100 != 0) ||
+ (year % 400 == 0)
+
+ return ret
+@}
+@c endfile
+@end group
+@end example
+
+This function is only used a few times in this file, and its computation
+could have been written @dfn{in-line} (at the point where it's used).
+Making it a separate function made the original development easier, and also
+avoids the possibility of typing errors when duplicating the code in
+multiple places.
+
+The next function is more interesting. It does most of the work of
+generating a timestamp, which is converting a date and time into some number
+of seconds since the Epoch. The caller passes an array (rather
+imaginatively named @code{a}) containing six
+values: the year including century, the month as a number between one and 12,
+the day of the month, the hour as a number between zero and 23, the minute in
+the hour, and the seconds within the minute.
+
+The function uses several local variables to precompute the number of
+seconds in an hour, seconds in a day, and seconds in a year. Often,
+similar C code simply writes out the expression in-line, expecting the
+compiler to do @dfn{constant folding}. E.g., most C compilers would
+turn @samp{60 * 60} into @samp{3600} at compile time, instead of recomputing
+it every time at run time. Precomputing these values makes the
+function more efficient.
+
+@findex _tm_addup
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# convert a date into seconds
+function _tm_addup(a, total, yearsecs, daysecs,
+ hoursecs, i, j)
+@{
+ hoursecs = 60 * 60
+ daysecs = 24 * hoursecs
+ yearsecs = 365 * daysecs
+
+ total = (a[1] - 1970) * yearsecs
+
+@group
+ # extra day for leap years
+ for (i = 1970; i < a[1]; i++)
+ if (_tm_isleap(i))
+ total += daysecs
+@end group
+
+@group
+ j = _tm_isleap(a[1])
+ for (i = 1; i < a[2]; i++)
+ total += _tm_months[j, i] * daysecs
+@end group
+
+ total += (a[3] - 1) * daysecs
+ total += a[4] * hoursecs
+ total += a[5] * 60
+ total += a[6]
+
+ return total
+@}
+@c endfile
+@c @end group
+@end example
+
+The function starts with a first approximation of all the seconds between
+Midnight, January 1, 1970,@footnote{This is the Epoch on POSIX systems.
+It may be different on other systems.} and the beginning of the current
+year. It then goes through all those years, and for every leap year,
+adds an additional day's worth of seconds.
+
+The variable @code{j} holds either one or zero, if the current year is or is not
+a leap year.
+For every month in the current year prior to the current month, it adds
+the number of seconds in the month, using the appropriate entry in the
+@code{_tm_months} array.
+
+Finally, it adds in the seconds for the number of days prior to the current
+day, and the number of hours, minutes, and seconds in the current day.
+
+The result is a count of seconds since January 1, 1970. This value is not
+yet what is needed though. The reason why is described shortly.
+
+The main @code{mktime} function takes a single character string argument.
+This string is a representation of a date and time in a ``canonical''
+(fixed) form. This string should be
+@code{"@var{year} @var{month} @var{day} @var{hour} @var{minute} @var{second}"}.
+
+@findex mktime
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# mktime --- convert a date into seconds,
+# compensate for time zone
+
+function mktime(str, res1, res2, a, b, i, j, t, diff)
+@{
+ i = split(str, a, " ") # don't rely on FS
+
+ if (i != 6)
+ return -1
+
+ # force numeric
+ for (j in a)
+ a[j] += 0
+
+@group
+ # validate
+ if (a[1] < 1970 ||
+ a[2] < 1 || a[2] > 12 ||
+ a[3] < 1 || a[3] > 31 ||
+ a[4] < 0 || a[4] > 23 ||
+ a[5] < 0 || a[5] > 59 ||
+ a[6] < 0 || a[6] > 60 )
+ return -1
+@end group
+
+ res1 = _tm_addup(a)
+ t = strftime("%Y %m %d %H %M %S", res1)
+
+ if (_tm_debug)
+ printf("(%s) -> (%s)\n", str, t) > "/dev/stderr"
+
+ split(t, b, " ")
+ res2 = _tm_addup(b)
+
+ diff = res1 - res2
+
+ if (_tm_debug)
+ printf("diff = %d seconds\n", diff) > "/dev/stderr"
+
+ res1 += diff
+
+ return res1
+@}
+@c endfile
+@c @end group
+@end example
+
+The function first splits the string into an array, using spaces and tabs as
+separators. If there are not six elements in the array, it returns an
+error, signaled as the value @minus{}1.
+Next, it forces each element of the array to be numeric, by adding zero to it.
+The following @samp{if} statement then makes sure that each element is
+within an allowable range. (This checking could be extended further, e.g.,
+to make sure that the day of the month is within the correct range for the
+particular month supplied.) All of this is essentially preliminary set-up
+and error checking.
+
+Recall that @code{_tm_addup} generated a value in seconds since Midnight,
+January 1, 1970. This value is not directly usable as the result we want,
+@emph{since the calculation does not account for the local timezone}. In other
+words, the value represents the count in seconds since the Epoch, but only
+for UTC (Universal Coordinated Time). If the local timezone is east or west
+of UTC, then some number of hours should be either added to, or subtracted from
+the resulting timestamp.
+
+For example, 6:23 p.m. in Atlanta, Georgia (USA), is normally five hours west
+of (behind) UTC. It is only four hours behind UTC if daylight savings
+time is in effect.
+If you are calling @code{mktime} in Atlanta, with the argument
+@code{@w{"1993 5 23 18 23 12"}}, the result from @code{_tm_addup} will be
+for 6:23 p.m. UTC, which is only 2:23 p.m. in Atlanta. It is necessary to
+add another four hours worth of seconds to the result.
+
+How can @code{mktime} determine how far away it is from UTC? This is
+surprisingly easy. The returned timestamp represents the time passed to
+@code{mktime} @emph{as UTC}. This timestamp can be fed back to
+@code{strftime}, which will format it as a @emph{local} time; i.e.@: as
+if it already had the UTC difference added in to it. This is done by
+giving @code{@w{"%Y %m %d %H %M %S"}} to @code{strftime} as the format
+argument. It returns the computed timestamp in the original string
+format. The result represents a time that accounts for the UTC
+difference. When the new time is converted back to a timestamp, the
+difference between the two timestamps is the difference (in seconds)
+between the local timezone and UTC. This difference is then added back
+to the original result. An example demonstrating this is presented below.
+
+Finally, there is a ``main'' program for testing the function.
+
+@example
+@c @group
+@c file eg/lib/mktime.awk
+BEGIN @{
+ if (_tm_test) @{
+ printf "Enter date as yyyy mm dd hh mm ss: "
+ getline _tm_test_date
+
+ t = mktime(_tm_test_date)
+ r = strftime("%Y %m %d %H %M %S", t)
+ printf "Got back (%s)\n", r
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The entire program uses two variables that can be set on the command
+line to control debugging output and to enable the test in the final
+@code{BEGIN} rule. Here is the result of a test run. (Note that debugging
+output is to standard error, and test output is to standard output.)
+
+@example
+@c @group
+$ gawk -f mktime.awk -v _tm_test=1 -v _tm_debug=1
+@print{} Enter date as yyyy mm dd hh mm ss: 1993 5 23 15 35 10
+@error{} (1993 5 23 15 35 10) -> (1993 05 23 11 35 10)
+@error{} diff = 14400 seconds
+@print{} Got back (1993 05 23 15 35 10)
+@c @end group
+@end example
+
+The time entered was 3:35 p.m. (15:35 on a 24-hour clock), on May 23, 1993.
+The first line
+of debugging output shows the resulting time as UTC---four hours ahead of
+the local time zone. The second line shows that the difference is 14400
+seconds, which is four hours. (The difference is only four hours, since
+daylight savings time is in effect during May.)
+The final line of test output shows that the timezone compensation
+algorithm works; the returned time is the same as the entered time.
+
+This program does not solve the general problem of turning an arbitrary date
+representation into a timestamp. That problem is very involved. However,
+the @code{mktime} function provides a foundation upon which to build. Other
+software can convert month names into numeric months, and AM/PM times into
+24-hour clocks, to generate the ``canonical'' format that @code{mktime}
+requires.
+
+@node Gettimeofday Function, Filetrans Function, Mktime Function, Library Functions
+@section Managing the Time of Day
+
+@cindex formatted timestamps
+@cindex timestamps, formatted
+The @code{systime} and @code{strftime} functions described in
+@ref{Time Functions, ,Functions for Dealing with Time Stamps},
+provide the minimum functionality necessary for dealing with the time of day
+in human readable form. While @code{strftime} is extensive, the control
+formats are not necessarily easy to remember or intuitively obvious when
+reading a program.
+
+The following function, @code{gettimeofday}, populates a user-supplied array
+with pre-formatted time information. It returns a string with the current
+time formatted in the same way as the @code{date} utility.
+
+@findex gettimeofday
+@example
+@c @group
+@c file eg/lib/gettime.awk
+# gettimeofday --- get the time of day in a usable format
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain, May 1993
+#
+# Returns a string in the format of output of date(1)
+# Populates the array argument time with individual values:
+# time["second"] -- seconds (0 - 59)
+# time["minute"] -- minutes (0 - 59)
+# time["hour"] -- hours (0 - 23)
+# time["althour"] -- hours (0 - 12)
+# time["monthday"] -- day of month (1 - 31)
+# time["month"] -- month of year (1 - 12)
+# time["monthname"] -- name of the month
+# time["shortmonth"] -- short name of the month
+# time["year"] -- year within century (0 - 99)
+# time["fullyear"] -- year with century (19xx or 20xx)
+# time["weekday"] -- day of week (Sunday = 0)
+# time["altweekday"] -- day of week (Monday = 0)
+# time["weeknum"] -- week number, Sunday first day
+# time["altweeknum"] -- week number, Monday first day
+# time["dayname"] -- name of weekday
+# time["shortdayname"] -- short name of weekday
+# time["yearday"] -- day of year (0 - 365)
+# time["timezone"] -- abbreviation of timezone name
+# time["ampm"] -- AM or PM designation
+
+@group
+function gettimeofday(time, ret, now, i)
+@{
+ # get time once, avoids unnecessary system calls
+ now = systime()
+
+ # return date(1)-style output
+ ret = strftime("%a %b %d %H:%M:%S %Z %Y", now)
+
+ # clear out target array
+ for (i in time)
+ delete time[i]
+@end group
+
+@group
+ # fill in values, force numeric values to be
+ # numeric by adding 0
+ time["second"] = strftime("%S", now) + 0
+ time["minute"] = strftime("%M", now) + 0
+ time["hour"] = strftime("%H", now) + 0
+ time["althour"] = strftime("%I", now) + 0
+ time["monthday"] = strftime("%d", now) + 0
+ time["month"] = strftime("%m", now) + 0
+ time["monthname"] = strftime("%B", now)
+ time["shortmonth"] = strftime("%b", now)
+ time["year"] = strftime("%y", now) + 0
+ time["fullyear"] = strftime("%Y", now) + 0
+ time["weekday"] = strftime("%w", now) + 0
+ time["altweekday"] = strftime("%u", now) + 0
+ time["dayname"] = strftime("%A", now)
+ time["shortdayname"] = strftime("%a", now)
+ time["yearday"] = strftime("%j", now) + 0
+ time["timezone"] = strftime("%Z", now)
+ time["ampm"] = strftime("%p", now)
+ time["weeknum"] = strftime("%U", now) + 0
+ time["altweeknum"] = strftime("%W", now) + 0
+
+ return ret
+@}
+@end group
+@c endfile
+@end example
+
+The string indices are easier to use and read than the various formats
+required by @code{strftime}. The @code{alarm} program presented in
+@ref{Alarm Program, ,An Alarm Clock Program},
+uses this function.
+
+@c exercise!!!
+The @code{gettimeofday} function is presented above as it was written. A
+more general design for this function would have allowed the user to supply
+an optional timestamp value that would have been used instead of the current
+time.
+
+@node Filetrans Function, Getopt Function, Gettimeofday Function, Library Functions
+@section Noting Data File Boundaries
+
+@cindex per file initialization and clean-up
+The @code{BEGIN} and @code{END} rules are each executed exactly once, at
+the beginning and end respectively of your @code{awk} program
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+We (the @code{gawk} authors) once had a user who mistakenly thought that the
+@code{BEGIN} rule was executed at the beginning of each data file and the
+@code{END} rule was executed at the end of each data file. When informed
+that this was not the case, the user requested that we add new special
+patterns to @code{gawk}, named @code{BEGIN_FILE} and @code{END_FILE}, that
+would have the desired behavior. He even supplied us the code to do so.
+
+However, after a little thought, I came up with the following library program.
+It arranges to call two user-supplied functions, @code{beginfile} and
+@code{endfile}, at the beginning and end of each data file.
+Besides solving the problem in only nine(!) lines of code, it does so
+@emph{portably}; this will work with any implementation of @code{awk}.
+
+@example
+@c @group
+# transfile.awk
+#
+# Give the user a hook for filename transitions
+#
+# The user must supply functions beginfile() and endfile()
+# that each take the name of the file being started or
+# finished, respectively.
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, January 1992
+# Public Domain
+
+FILENAME != _oldfilename \
+@{
+ if (_oldfilename != "")
+ endfile(_oldfilename)
+ _oldfilename = FILENAME
+ beginfile(FILENAME)
+@}
+
+END @{ endfile(FILENAME) @}
+@c @end group
+@end example
+
+This file must be loaded before the user's ``main'' program, so that the
+rule it supplies will be executed first.
+
+This rule relies on @code{awk}'s @code{FILENAME} variable that
+automatically changes for each new data file. The current file name is
+saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does
+not equal @code{_oldfilename}, then a new data file is being processed, and
+it is necessary to call @code{endfile} for the old file. Since
+@code{endfile} should only be called if a file has been processed, the
+program first checks to make sure that @code{_oldfilename} is not the null
+string. The program then assigns the current file name to
+@code{_oldfilename}, and calls @code{beginfile} for the file.
+Since, like all @code{awk} variables, @code{_oldfilename} will be
+initialized to the null string, this rule executes correctly even for the
+first data file.
+
+The program also supplies an @code{END} rule, to do the final processing for
+the last file. Since this @code{END} rule comes before any @code{END} rules
+supplied in the ``main'' program, @code{endfile} will be called first. Once
+again the value of multiple @code{BEGIN} and @code{END} rules should be clear.
+
+@findex beginfile
+@findex endfile
+This version has same problem as the first version of @code{nextfile}
+(@pxref{Nextfile Function, ,Implementing @code{nextfile} as a Function}).
+If the same data file occurs twice in a row on command line, then
+@code{endfile} and @code{beginfile} will not be executed at the end of the
+first pass and at the beginning of the second pass.
+This version solves the problem.
+
+@example
+@c @group
+@c file eg/lib/ftrans.awk
+# ftrans.awk --- handle data file transitions
+#
+# user supplies beginfile() and endfile() functions
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu. November 1992
+# Public Domain
+
+FNR == 1 @{
+ if (_filename_ != "")
+ endfile(_filename_)
+ _filename_ = FILENAME
+ beginfile(FILENAME)
+@}
+
+END @{ endfile(_filename_) @}
+@c endfile
+@c @end group
+@end example
+
+In @ref{Wc Program, ,Counting Things},
+you will see how this library function can be used, and
+how it simplifies writing the main program.
+
+@node Getopt Function, Passwd Functions, Filetrans Function, Library Functions
+@section Processing Command Line Options
+
+@cindex @code{getopt}, C version
+@cindex processing arguments
+@cindex argument processing
+Most utilities on POSIX compatible systems take options or ``switches'' on
+the command line that can be used to change the way a program behaves.
+@code{awk} is an example of such a program
+(@pxref{Options, ,Command Line Options}).
+Often, options take @dfn{arguments}, data that the program needs to
+correctly obey the command line option. For example, @code{awk}'s
+@samp{-F} option requires a string to use as the field separator.
+The first occurrence on the command line of either @samp{--} or a
+string that does not begin with @samp{-} ends the options.
+
+Most Unix systems provide a C function named @code{getopt} for processing
+command line arguments. The programmer provides a string describing the one
+letter options. If an option requires an argument, it is followed in the
+string with a colon. @code{getopt} is also passed the
+count and values of the command line arguments, and is called in a loop.
+@code{getopt} processes the command line arguments for option letters.
+Each time around the loop, it returns a single character representing the
+next option letter that it found, or @samp{?} if it found an invalid option.
+When it returns @minus{}1, there are no options left on the command line.
+
+When using @code{getopt}, options that do not take arguments can be
+grouped together. Furthermore, options that take arguments require that the
+argument be present. The argument can immediately follow the option letter,
+or it can be a separate command line argument.
+
+Given a hypothetical program that takes
+three command line options, @samp{-a}, @samp{-b}, and @samp{-c}, and
+@samp{-b} requires an argument, all of the following are valid ways of
+invoking the program:
+
+@example
+@c @group
+prog -a -b foo -c data1 data2 data3
+prog -ac -bfoo -- data1 data2 data3
+prog -acbfoo data1 data2 data3
+@c @end group
+@end example
+
+Notice that when the argument is grouped with its option, the rest of
+the command line argument is considered to be the option's argument.
+In the above example, @samp{-acbfoo} indicates that all of the
+@samp{-a}, @samp{-b}, and @samp{-c} options were supplied,
+and that @samp{foo} is the argument to the @samp{-b} option.
+
+@code{getopt} provides four external variables that the programmer can use.
+
+@table @code
+@item optind
+The index in the argument value array (@code{argv}) where the first
+non-option command line argument can be found.
+
+@item optarg
+The string value of the argument to an option.
+
+@item opterr
+Usually @code{getopt} prints an error message when it finds an invalid
+option. Setting @code{opterr} to zero disables this feature. (An
+application might wish to print its own error message.)
+
+@item optopt
+The letter representing the command line option.
+While not usually documented, most versions supply this variable.
+@end table
+
+The following C fragment shows how @code{getopt} might process command line
+arguments for @code{awk}.
+
+@example
+@group
+int
+main(int argc, char *argv[])
+@{
+ @dots{}
+ /* print our own message */
+ opterr = 0;
+@end group
+@group
+ while ((c = getopt(argc, argv, "v:f:F:W:")) != -1) @{
+ switch (c) @{
+ case 'f': /* file */
+ @dots{}
+ break;
+ case 'F': /* field separator */
+ @dots{}
+ break;
+ case 'v': /* variable assignment */
+ @dots{}
+ break;
+ case 'W': /* extension */
+ @dots{}
+ break;
+ case '?':
+ default:
+ usage();
+ break;
+ @}
+ @}
+ @dots{}
+@}
+@end group
+@end example
+
+As a side point, @code{gawk} actually uses the GNU @code{getopt_long}
+function to process both normal and GNU-style long options
+(@pxref{Options, ,Command Line Options}).
+
+The abstraction provided by @code{getopt} is very useful, and would be quite
+handy in @code{awk} programs as well. Here is an @code{awk} version of
+@code{getopt}. This function highlights one of the greatest weaknesses in
+@code{awk}, which is that it is very poor at manipulating single characters.
+Repeated calls to @code{substr} are necessary for accessing individual
+characters (@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+The discussion walks through the code a bit at a time.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+# getopt --- do C library getopt(3) function in awk
+#
+# arnold@@gnu.ai.mit.edu
+# Public domain
+#
+# Initial version: March, 1991
+# Revised: May, 1993
+
+@group
+# External variables:
+# Optind -- index of ARGV for first non-option argument
+# Optarg -- string value of argument to current option
+# Opterr -- if non-zero, print our own diagnostic
+# Optopt -- current option letter
+@end group
+
+# Returns
+# -1 at end of options
+# ? for unrecognized option
+# <c> a character representing the current option
+
+# Private Data
+# _opti index in multi-flag option, e.g., -abc
+@c endfile
+@c @end group
+@end example
+
+The function starts out with some documentation: who wrote the code,
+and when it was revised, followed by a list of the global variables it uses,
+what the return values are and what they mean, and any global variables that
+are ``private'' to this library function. Such documentation is essential
+for any program, and particularly for library functions.
+
+@findex getopt
+@example
+@c @group
+@c file eg/lib/getopt.awk
+function getopt(argc, argv, options, optl, thisopt, i)
+@{
+ optl = length(options)
+ if (optl == 0) # no options given
+ return -1
+
+ if (argv[Optind] == "--") @{ # all done
+ Optind++
+ _opti = 0
+ return -1
+ @} else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) @{
+ _opti = 0
+ return -1
+ @}
+@c endfile
+@c @end group
+@end example
+
+The function first checks that it was indeed called with a string of options
+(the @code{options} parameter). If @code{options} has a zero length,
+@code{getopt} immediately returns @minus{}1.
+
+The next thing to check for is the end of the options. A @samp{--} ends the
+command line options, as does any command line argument that does not begin
+with a @samp{-}. @code{Optind} is used to step through the array of command
+line arguments; it retains its value across calls to @code{getopt}, since it
+is a global variable.
+
+The regexp used, @code{@w{/^-[^: \t\n\f\r\v\b]/}}, is
+perhaps a bit of overkill; it checks for a @samp{-} followed by anything
+that is not whitespace and not a colon.
+If the current command line argument does not match this pattern,
+it is not an option, and it ends option processing.
+
+@example
+@group
+@c file eg/lib/getopt.awk
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) @{
+ if (Opterr)
+ printf("%c -- invalid option\n",
+ thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return "?"
+ @}
+@c endfile
+@end group
+@end example
+
+The @code{_opti} variable tracks the position in the current command line
+argument (@code{argv[Optind]}). In the case that multiple options were
+grouped together with one @samp{-} (e.g., @samp{-abx}), it is necessary
+to return them to the user one at a time.
+
+If @code{_opti} is equal to zero, it is set to two, the index in the string
+of the next character to look at (we skip the @samp{-}, which is at position
+one). The variable @code{thisopt} holds the character, obtained with
+@code{substr}. It is saved in @code{Optopt} for the main program to use.
+
+If @code{thisopt} is not in the @code{options} string, then it is an
+invalid option. If @code{Opterr} is non-zero, @code{getopt} prints an error
+message on the standard error that is similar to the message from the C
+version of @code{getopt}.
+
+Since the option is invalid, it is necessary to skip it and move on to the
+next option character. If @code{_opti} is greater than or equal to the
+length of the current command line argument, then it is necessary to move on
+to the next one, so @code{Optind} is incremented and @code{_opti} is reset
+to zero. Otherwise, @code{Optind} is left alone and @code{_opti} is merely
+incremented.
+
+In any case, since the option was invalid, @code{getopt} returns @samp{?}.
+The main program can examine @code{Optopt} if it needs to know what the
+invalid option letter actually was.
+
+@example
+@group
+@c file eg/lib/getopt.awk
+ if (substr(options, i + 1, 1) == ":") @{
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ @} else
+ Optarg = ""
+@c endfile
+@end group
+@end example
+
+If the option requires an argument, the option letter is followed by a colon
+in the @code{options} string. If there are remaining characters in the
+current command line argument (@code{argv[Optind]}), then the rest of that
+string is assigned to @code{Optarg}. Otherwise, the next command line
+argument is used (@samp{-xFOO} vs. @samp{@w{-x FOO}}). In either case,
+@code{_opti} is reset to zero, since there are no more characters left to
+examine in the current command line argument.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+ if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return thisopt
+@}
+@c endfile
+@c @end group
+@end example
+
+Finally, if @code{_opti} is either zero or greater than the length of the
+current command line argument, it means this element in @code{argv} is
+through being processed, so @code{Optind} is incremented to point to the
+next element in @code{argv}. If neither condition is true, then only
+@code{_opti} is incremented, so that the next option letter can be processed
+on the next call to @code{getopt}.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+BEGIN @{
+ Opterr = 1 # default is to diagnose
+ Optind = 1 # skip ARGV[0]
+
+ # test program
+ if (_getopt_test) @{
+ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
+ printf("c = <%c>, optarg = <%s>\n",
+ _go_c, Optarg)
+ printf("non-option arguments:\n")
+ for (; Optind < ARGC; Optind++)
+ printf("\tARGV[%d] = <%s>\n",
+ Optind, ARGV[Optind])
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
+@code{Opterr} is set to one, since the default behavior is for @code{getopt}
+to print a diagnostic message upon seeing an invalid option. @code{Optind}
+is set to one, since there's no reason to look at the program name, which is
+in @code{ARGV[0]}.
+
+The rest of the @code{BEGIN} rule is a simple test program. Here is the
+result of two sample runs of the test program.
+
+@example
+@group
+$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
+@print{} c = <a>, optarg = <>
+@print{} c = <c>, optarg = <>
+@print{} c = <b>, optarg = <ARG>
+@print{} non-option arguments:
+@print{} ARGV[3] = <bax>
+@print{} ARGV[4] = <-x>
+@end group
+
+@group
+$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc
+@print{} c = <a>, optarg = <>
+@error{} x -- invalid option
+@print{} c = <?>, optarg = <>
+@print{} non-option arguments:
+@print{} ARGV[4] = <xyz>
+@print{} ARGV[5] = <abc>
+@end group
+@end example
+
+The first @samp{--} terminates the arguments to @code{awk}, so that it does
+not try to interpret the @samp{-a} etc. as its own options.
+
+Several of the sample programs presented in
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+use @code{getopt} to process their arguments.
+
+@node Passwd Functions, Group Functions, Getopt Function, Library Functions
+@section Reading the User Database
+
+@cindex @file{/dev/user}
+The @file{/dev/user} special file
+(@pxref{Special Files, ,Special File Names in @code{gawk}})
+provides access to the current user's real and effective user and group id
+numbers, and if available, the user's supplementary group set.
+However, since these are numbers, they do not provide very useful
+information to the average user. There needs to be some way to find the
+user information associated with the user and group numbers. This
+section presents a suite of functions for retrieving information from the
+user database. @xref{Group Functions, ,Reading the Group Database},
+for a similar suite that retrieves information from the group database.
+
+@cindex @code{getpwent}, C version
+@cindex user information
+@cindex login information
+@cindex account information
+@cindex password file
+The POSIX standard does not define the file where user information is
+kept. Instead, it provides the @code{<pwd.h>} header file
+and several C language subroutines for obtaining user information.
+The primary function is @code{getpwent}, for ``get password entry.''
+The ``password'' comes from the original user database file,
+@file{/etc/passwd}, which kept user information, along with the
+encrypted passwords (hence the name).
+
+While an @code{awk} program could simply read @file{/etc/passwd} directly
+(the format is well known), because of the way password
+files are handled on networked systems,
+this file may not contain complete information about the system's set of users.
+
+@cindex @code{pwcat} program
+To be sure of being
+able to produce a readable, complete version of the user database, it is
+necessary to write a small C program that calls @code{getpwent}.
+@code{getpwent} is defined to return a pointer to a @code{struct passwd}.
+Each time it is called, it returns the next entry in the database.
+When there are no more entries, it returns @code{NULL}, the null pointer.
+When this happens, the C program should call @code{endpwent} to close the
+database.
+Here is @code{pwcat}, a C program that ``cats'' the password database.
+
+@findex pwcat.c
+@example
+@c @group
+@c file eg/lib/pwcat.c
+/*
+ * pwcat.c
+ *
+ * Generate a printable version of the password database
+ *
+ * Arnold Robbins
+ * arnold@@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <pwd.h>
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+@{
+ struct passwd *p;
+
+ while ((p = getpwent()) != NULL)
+ printf("%s:%s:%d:%d:%s:%s:%s\n",
+ p->pw_name, p->pw_passwd, p->pw_uid,
+ p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell);
+
+ endpwent();
+ exit(0);
+@}
+@c endfile
+@c @end group
+@end example
+
+If you don't understand C, don't worry about it.
+The output from @code{pwcat} is the user database, in the traditional
+@file{/etc/passwd} format of colon-separated fields. The fields are:
+
+@table @asis
+@item Login name
+The user's login name.
+
+@item Encrypted password
+The user's encrypted password. This may not be available on some systems.
+
+@item User-ID
+The user's numeric user-id number.
+
+@item Group-ID
+The user's numeric group-id number.
+
+@item Full name
+The user's full name, and perhaps other information associated with the
+user.
+
+@item Home directory
+The user's login, or ``home'' directory (familiar to shell programmers as
+@code{$HOME}).
+
+@item Login shell
+The program that will be run when the user logs in. This is usually a
+shell, such as Bash (the Gnu Bourne-Again shell).
+@end table
+
+Here are a few lines representative of @code{pwcat}'s output.
+
+@example
+@c @group
+$ pwcat
+@print{} root:3Ov02d5VaUPB6:0:1:Operator:/:/bin/sh
+@print{} nobody:*:65534:65534::/:
+@print{} daemon:*:1:1::/:
+@print{} sys:*:2:2::/:/bin/csh
+@print{} bin:*:3:3::/bin:
+@print{} arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@print{} miriam:yxaay:112:10:Miriam Robbins:/home/miriam:/bin/sh
+@print{} andy:abcca2:113:10:Andy Jacobs:/home/andy:/bin/sh
+@dots{}
+@c @end group
+@end example
+
+With that introduction, here is a group of functions for getting user
+information. There are several functions here, corresponding to the C
+functions of the same name.
+
+@findex _pw_init
+@example
+@c file eg/lib/passwdawk.in
+@group
+# passwd.awk --- access password file information
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN @{
+ # tailor this to suit your system
+ _pw_awklib = "/usr/local/libexec/awk/"
+@}
+@end group
+
+@group
+function _pw_init( oldfs, oldrs, olddol0, pwcat)
+@{
+ if (_pw_inited)
+ return
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+ pwcat = _pw_awklib "pwcat"
+ while ((pwcat | getline) > 0) @{
+ _pw_byname[$1] = $0
+ _pw_byuid[$3] = $0
+ _pw_bycount[++_pw_total] = $0
+ @}
+ close(pwcat)
+ _pw_count = 0
+ _pw_inited = 1
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@code{pwcat} is stored. Since it is used to help out an @code{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}.
+You might want it to be in a different directory on your system.
+
+The function @code{_pw_init} keeps three copies of the user information
+in three associative arrays. The arrays are indexed by user name
+(@code{_pw_byname}), by user-id number (@code{_pw_byuid}), and by order of
+occurrence (@code{_pw_bycount}).
+
+The variable @code{_pw_inited} is used for efficiency; @code{_pw_init} only
+needs to be called once.
+
+Since this function uses @code{getline} to read information from
+@code{pwcat}, it first saves the values of @code{FS}, @code{RS}, and
+@code{$0}. Doing so is necessary, since these functions could be called
+from anywhere within a user's program, and the user may have his or her
+own values for @code{FS} and @code{RS}.
+@ignore
+Problem, what if FIELDWIDTHS is in use? Sigh.
+@end ignore
+
+The main part of the function uses a loop to read database lines, split
+the line into fields, and then store the line into each array as necessary.
+When the loop is done, @code{@w{_pw_init}} cleans up by closing the pipeline,
+setting @code{@w{_pw_inited}} to one, and restoring @code{FS}, @code{RS}, and
+@code{$0}. The use of @code{@w{_pw_count}} will be explained below.
+
+@findex getpwnam
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwnam(name)
+@{
+ _pw_init()
+ if (name in _pw_byname)
+ return _pw_byname[name]
+ return ""
+@}
+@c endfile
+@end group
+@end example
+
+The @code{getpwnam} function takes a user name as a string argument. If that
+user is in the database, it returns the appropriate line. Otherwise it
+returns the null string.
+
+@findex getpwuid
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwuid(uid)
+@{
+ _pw_init()
+ if (uid in _pw_byuid)
+ return _pw_byuid[uid]
+ return ""
+@}
+@c endfile
+@end group
+@end example
+
+Similarly,
+the @code{getpwuid} function takes a user-id number argument. If that
+user number is in the database, it returns the appropriate line. Otherwise it
+returns the null string.
+
+@findex getpwent
+@example
+@c @group
+@c file eg/lib/passwdawk.in
+function getpwent()
+@{
+ _pw_init()
+ if (_pw_count < _pw_total)
+ return _pw_bycount[++_pw_count]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getpwent} function simply steps through the database, one entry at
+a time. It uses @code{_pw_count} to track its current position in the
+@code{_pw_bycount} array.
+
+@findex endpwent
+@example
+@c @group
+@c file eg/lib/passwdawk.in
+function endpwent()
+@{
+ _pw_count = 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{@w{endpwent}} function resets @code{@w{_pw_count}} to zero, so that
+subsequent calls to @code{getpwent} will start over again.
+
+A conscious design decision in this suite is that each subroutine calls
+@code{@w{_pw_init}} to initialize the database arrays. The overhead of running
+a separate process to generate the user database, and the I/O to scan it,
+will only be incurred if the user's main program actually calls one of these
+functions. If this library file is loaded along with a user's program, but
+none of the routines are ever called, then there is no extra run-time overhead.
+(The alternative would be to move the body of @code{@w{_pw_init}} into a
+@code{BEGIN} rule, which would always run @code{pwcat}. This simplifies the
+code but runs an extra process that may never be needed.)
+
+In turn, calling @code{_pw_init} is not too expensive, since the
+@code{_pw_inited} variable keeps the program from reading the data more than
+once. If you are worried about squeezing every last cycle out of your
+@code{awk} program, the check of @code{_pw_inited} could be moved out of
+@code{_pw_init} and duplicated in all the other functions. In practice,
+this is not necessary, since most @code{awk} programs are I/O bound, and it
+would clutter up the code.
+
+The @code{id} program in @ref{Id Program, ,Printing Out User Information},
+uses these functions.
+
+@node Group Functions, Library Names, Passwd Functions, Library Functions
+@section Reading the Group Database
+
+@cindex @code{getgrent}, C version
+@cindex group information
+@cindex account information
+@cindex group file
+Much of the discussion presented in
+@ref{Passwd Functions, ,Reading the User Database},
+applies to the group database as well. Although there has traditionally
+been a well known file, @file{/etc/group}, in a well known format, the POSIX
+standard only provides a set of C library routines
+(@code{<grp.h>} and @code{getgrent})
+for accessing the information.
+Even though this file may exist, it likely does not have
+complete information. Therefore, as with the user database, it is necessary
+to have a small C program that generates the group database as its output.
+
+@cindex @code{grcat} program
+Here is @code{grcat}, a C program that ``cats'' the group database.
+
+@findex grcat.c
+@example
+@c @group
+@c file eg/lib/grcat.c
+/*
+ * grcat.c
+ *
+ * Generate a printable version of the group database
+ *
+ * Arnold Robbins, arnold@@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <grp.h>
+
+@group
+int
+main(argc, argv)
+int argc;
+char **argv;
+@{
+ struct group *g;
+ int i;
+@end group
+
+ while ((g = getgrent()) != NULL) @{
+ printf("%s:%s:%d:", g->gr_name, g->gr_passwd,
+ g->gr_gid);
+ for (i = 0; g->gr_mem[i] != NULL; i++) @{
+ printf("%s", g->gr_mem[i]);
+ if (g->gr_mem[i+1] != NULL)
+ putchar(',');
+ @}
+ putchar('\n');
+ @}
+ endgrent();
+ exit(0);
+@}
+@c endfile
+@c @end group
+@end example
+
+Each line in the group database represent one group. The fields are
+separated with colons, and represent the following information.
+
+@table @asis
+@item Group Name
+The name of the group.
+
+@item Group Password
+The encrypted group password. In practice, this field is never used. It is
+usually empty, or set to @samp{*}.
+
+@item Group ID Number
+The numeric group-id number. This number should be unique within the file.
+
+@item Group Member List
+A comma-separated list of user names. These users are members of the group.
+Most Unix systems allow users to be members of several groups
+simultaneously. If your system does, then reading @file{/dev/user} will
+return those group-id numbers in @code{$5} through @code{$NF}.
+(Note that @file{/dev/user} is a @code{gawk} extension;
+@pxref{Special Files, ,Special File Names in @code{gawk}}.)
+@end table
+
+Here is what running @code{grcat} might produce:
+
+@example
+@group
+$ grcat
+@print{} wheel:*:0:arnold
+@print{} nogroup:*:65534:
+@print{} daemon:*:1:
+@print{} kmem:*:2:
+@print{} staff:*:10:arnold,miriam,andy
+@print{} other:*:20:
+@dots{}
+@end group
+@end example
+
+Here are the functions for obtaining information from the group database.
+There are several, modeled after the C library functions of the same names.
+
+@findex _gr_init
+@example
+@group
+@c file eg/lib/groupawk.in
+# group.awk --- functions for dealing with the group file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+@{
+ # Change to suit your system
+ _gr_awklib = "/usr/local/libexec/awk/"
+@}
+@c endfile
+@end group
+
+@group
+@c file eg/lib/groupawk.in
+function _gr_init( oldfs, oldrs, olddol0, grcat, n, a, i)
+@{
+ if (_gr_inited)
+ return
+@end group
+
+@group
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+@end group
+
+@group
+ grcat = _gr_awklib "grcat"
+ while ((grcat | getline) > 0) @{
+ if ($1 in _gr_byname)
+ _gr_byname[$1] = _gr_byname[$1] "," $4
+ else
+ _gr_byname[$1] = $0
+ if ($3 in _gr_bygid)
+ _gr_bygid[$3] = _gr_bygid[$3] "," $4
+ else
+ _gr_bygid[$3] = $0
+
+ n = split($4, a, "[ \t]*,[ \t]*")
+@end group
+@group
+ for (i = 1; i <= n; i++)
+ if (a[i] in _gr_groupsbyuser)
+ _gr_groupsbyuser[a[i]] = \
+ _gr_groupsbyuser[a[i]] " " $1
+ else
+ _gr_groupsbyuser[a[i]] = $1
+@end group
+
+@group
+ _gr_bycount[++_gr_count] = $0
+ @}
+@end group
+@group
+ close(grcat)
+ _gr_count = 0
+ _gr_inited++
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@code{grcat} is stored. Since it is used to help out an @code{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}. You might
+want it to be in a different directory on your system.
+
+These routines follow the same general outline as the user database routines
+(@pxref{Passwd Functions, ,Reading the User Database}).
+The @code{@w{_gr_inited}} variable is used to
+ensure that the database is scanned no more than once.
+The @code{@w{_gr_init}} function first saves @code{FS}, @code{RS}, and
+@code{$0}, and then sets @code{FS} and @code{RS} to the correct values for
+scanning the group information.
+
+The group information is stored is several associative arrays.
+The arrays are indexed by group name (@code{@w{_gr_byname}}), by group-id number
+(@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}).
+There is an additional array indexed by user name (@code{@w{_gr_groupsbyuser}}),
+that is a space separated list of groups that each user belongs to.
+
+Unlike the user database, it is possible to have multiple records in the
+database for the same group. This is common when a group has a large number
+of members. Such a pair of entries might look like:
+
+@example
+tvpeople:*:101:johny,jay,arsenio
+tvpeople:*:101:david,conan,tom,joan
+@end example
+
+For this reason, @code{_gr_init} looks to see if a group name or
+group-id number has already been seen. If it has, then the user names are
+simply concatenated onto the previous list of users. (There is actually a
+subtle problem with the code presented above. Suppose that
+the first time there were no names. This code adds the names with
+a leading comma. It also doesn't check that there is a @code{$4}.)
+
+Finally, @code{_gr_init} closes the pipeline to @code{grcat}, restores
+@code{FS}, @code{RS}, and @code{$0}, initializes @code{_gr_count} to zero
+(it is used later), and makes @code{_gr_inited} non-zero.
+
+@findex getgrnam
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrnam(group)
+@{
+ _gr_init()
+ if (group in _gr_byname)
+ return _gr_byname[group]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrnam} function takes a group name as its argument, and if that
+group exists, it is returned. Otherwise, @code{getgrnam} returns the null
+string.
+
+@findex getgrgid
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrgid(gid)
+@{
+ _gr_init()
+ if (gid in _gr_bygid)
+ return _gr_bygid[gid]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrgid} function is similar, it takes a numeric group-id, and
+looks up the information associated with that group-id.
+
+@findex getgruser
+@example
+@group
+@c file eg/lib/groupawk.in
+function getgruser(user)
+@{
+ _gr_init()
+ if (user in _gr_groupsbyuser)
+ return _gr_groupsbyuser[user]
+ return ""
+@}
+@c endfile
+@end group
+@end example
+
+The @code{getgruser} function does not have a C counterpart. It takes a
+user name, and returns the list of groups that have the user as a member.
+
+@findex getgrent
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrent()
+@{
+ _gr_init()
+ if (++gr_count in _gr_bycount)
+ return _gr_bycount[_gr_count]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrent} function steps through the database one entry at a time.
+It uses @code{_gr_count} to track its position in the list.
+
+@findex endgrent
+@example
+@group
+@c file eg/lib/groupawk.in
+function endgrent()
+@{
+ _gr_count = 0
+@}
+@c endfile
+@end group
+@end example
+
+@code{endgrent} resets @code{_gr_count} to zero so that @code{getgrent} can
+start over again.
+
+As with the user database routines, each function calls @code{_gr_init} to
+initialize the arrays. Doing so only incurs the extra overhead of running
+@code{grcat} if these functions are used (as opposed to moving the body of
+@code{_gr_init} into a @code{BEGIN} rule).
+
+Most of the work is in scanning the database and building the various
+associative arrays. The functions that the user calls are themselves very
+simple, relying on @code{awk}'s associative arrays to do work.
+
+The @code{id} program in @ref{Id Program, ,Printing Out User Information},
+uses these functions.
+
+@node Library Names, , Group Functions, Library Functions
+@section Naming Library Function Global Variables
+
+@cindex namespace issues in @code{awk}
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+Due to the way the @code{awk} language evolved, variables are either
+@dfn{global} (usable by the entire program), or @dfn{local} (usable just by
+a specific function). There is no intermediate state analogous to
+@code{static} variables in C.
+
+Library functions often need to have global variables that they can use to
+preserve state information between calls to the function. For example,
+@code{getopt}'s variable @code{_opti}
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{_tm_months} array used by @code{mktime}
+(@pxref{Mktime Function, ,Turning Dates Into Timestamps}).
+Such variables are called @dfn{private}, since the only functions that need to
+use them are the ones in the library.
+
+When writing a library function, you should try to choose names for your
+private variables so that they will not conflict with any variables used by
+either another library function or a user's main program. For example, a
+name like @samp{i} or @samp{j} is not a good choice, since user programs
+often use variable names like these for their own purposes.
+
+The example programs shown in this chapter all start the names of their
+private variables with an underscore (@samp{_}). Users generally don't use
+leading underscores in their variable names, so this convention immediately
+decreases the chances that the variable name will be accidentally shared
+with the user's program.
+
+In addition, several of the library functions use a prefix that helps
+indicate what function or set of functions uses the variables. For example,
+@code{_tm_months} in @code{mktime}
+(@pxref{Mktime Function, ,Turning Dates Into Timestamps}), and
+@code{_pw_byname} in the user data base routines
+(@pxref{Passwd Functions, ,Reading the User Database}).
+This convention is recommended, since it even further decreases the chance
+of inadvertent conflict among variable names.
+Note that this convention can be used equally well both for variable names
+and for private function names too.
+
+While I could have re-written all the library routines to use this
+convention, I did not do so, in order to show how my own @code{awk}
+programming style has evolved, and to provide some basis for this
+discussion.
+
+As a final note on variable naming, if a function makes global variables
+available for use by a main program, it is a good convention to start that
+variable's name with a capital letter.
+For example, @code{getopt}'s @code{Opterr} and @code{Optind} variables
+(@pxref{Getopt Function, ,Processing Command Line Options}).
+The leading capital letter indicates that it is global, while the fact that
+the variable name is not all capital letters indicates that the variable is
+not one of @code{awk}'s built-in variables, like @code{FS}.
+
+It is also important that @emph{all} variables in library functions
+that do not need to save state are in fact declared local. If this is
+not done, the variable could accidentally be used in the user's program,
+leading to bugs that are very difficult to track down.
+
+@example
+function lib_func(x, y, l1, l2)
+@{
+ @dots{}
+ @var{use variable} some_var # some_var could be local
+ @dots{} # but is not by oversight
+@}
+@end example
+
+@cindex Tcl
+A different convention, common in the Tcl community, is to use a single
+associative array to hold the values needed by the library function(s), or
+``package.'' This significantly decreases the number of actual global names
+in use. For example, the functions described in
+@ref{Passwd Functions, , Reading the User Database},
+might have used @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
+@code{@w{PW_data["count"]}} and @code{@w{PW_data["awklib"]}}, instead of
+@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
+and @code{@w{_pw_count}}.
+
+The conventions presented in this section are exactly that, conventions. You
+are not required to write your programs this way, we merely recommend that
+you do so.
+
+@node Sample Programs, Language History, Library Functions, Top
+@chapter Practical @code{awk} Programs
+
+This chapter presents a potpourri of @code{awk} programs for your reading
+enjoyment.
+@iftex
+There are two sections. The first presents @code{awk}
+versions of several common POSIX utilities.
+The second is a grab-bag of interesting programs.
+@end iftex
+
+Many of these programs use the library functions presented in
+@ref{Library Functions, ,A Library of @code{awk} Functions}.
+
+@menu
+* Clones:: Clones of common utilities.
+* Miscellaneous Programs:: Some interesting @code{awk} programs.
+@end menu
+
+@node Clones, Miscellaneous Programs, Sample Programs, Sample Programs
+@section Re-inventing Wheels for Fun and Profit
+
+This section presents a number of POSIX utilities that are implemented in
+@code{awk}. Re-inventing these programs in @code{awk} is often enjoyable,
+since the algorithms can be very clearly expressed, and usually the code is
+very concise and simple. This is true because @code{awk} does so much for you.
+
+It should be noted that these programs are not necessarily intended to
+replace the installed versions on your system. Instead, their
+purpose is to illustrate @code{awk} language programming for ``real world''
+tasks.
+
+The programs are presented in alphabetical order.
+
+@menu
+* Cut Program:: The @code{cut} utility.
+* Egrep Program:: The @code{egrep} utility.
+* Id Program:: The @code{id} utility.
+* Split Program:: The @code{split} utility.
+* Tee Program:: The @code{tee} utility.
+* Uniq Program:: The @code{uniq} utility.
+* Wc Program:: The @code{wc} utility.
+@end menu
+
+@node Cut Program, Egrep Program, Clones, Clones
+@subsection Cutting Out Fields and Columns
+
+@cindex @code{cut} utility
+The @code{cut} utility selects, or ``cuts,'' either characters or fields
+from its standard
+input and sends them to its standard output. @code{cut} can cut out either
+a list of characters, or a list of fields. By default, fields are separated
+by tabs, but you may supply a command line option to change the field
+@dfn{delimiter}, i.e.@: the field separator character. @code{cut}'s definition
+of fields is less general than @code{awk}'s.
+
+A common use of @code{cut} might be to pull out just the login name of
+logged-on users from the output of @code{who}. For example, the following
+pipeline generates a sorted, unique list of the logged on users:
+
+@example
+who | cut -c1-8 | sort | uniq
+@end example
+
+The options for @code{cut} are:
+
+@table @code
+@item -c @var{list}
+Use @var{list} as the list of characters to cut out. Items within the list
+may be separated by commas, and ranges of characters can be separated with
+dashes. The list @samp{1-8,15,22-35} specifies characters one through
+eight, 15, and 22 through 35.
+
+@item -f @var{list}
+Use @var{list} as the list of fields to cut out.
+
+@item -d @var{delim}
+Use @var{delim} as the field separator character instead of the tab
+character.
+
+@item -s
+Suppress printing of lines that do not contain the field delimiter.
+@end table
+
+The @code{awk} implementation of @code{cut} uses the @code{getopt} library
+function (@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The program begins with a comment describing the options and a @code{usage}
+function which prints out a usage message and exits. @code{usage} is called
+if invalid arguments are supplied.
+
+@findex cut.awk
+@example
+@c @group
+@c file eg/prog/cut.awk
+# cut.awk --- implement cut in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -f list Cut fields
+# -d c Field delimiter character
+# -c list Cut characters
+#
+# -s Suppress lines without the delimiter character
+
+function usage( e1, e2)
+@{
+ e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
+ e2 = "usage: cut [-c list] [files...]"
+ print e1 > "/dev/stderr"
+ print e2 > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+@noindent
+The variables @code{e1} and @code{e2} are used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+Next comes a @code{BEGIN} rule that parses the command line options.
+It sets @code{FS} to a single tab character, since that is @code{cut}'s
+default field separator. The output field separator is also set to be the
+same as the input field separator. Then @code{getopt} is used to step
+through the command line options. One or the other of the variables
+@code{by_fields} or @code{by_chars} is set to true, to indicate that
+processing should be done by fields or by characters respectively.
+When cutting by characters, the output field separator is set to the null
+string.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+BEGIN \
+@{
+ FS = "\t" # default
+ OFS = FS
+ while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
+ if (c == "f") @{
+ by_fields = 1
+ fieldlist = Optarg
+@group
+ @} else if (c == "c") @{
+ by_chars = 1
+ fieldlist = Optarg
+ OFS = ""
+ @} else if (c == "d") @{
+ if (length(Optarg) > 1) @{
+ printf("Using first character of %s" \
+ " for delimiter\n", Optarg) > "/dev/stderr"
+ Optarg = substr(Optarg, 1, 1)
+ @}
+ FS = Optarg
+ OFS = FS
+ if (FS == " ") # defeat awk semantics
+ FS = "[ ]"
+ @} else if (c == "s")
+ suppress++
+ else
+ usage()
+ @}
+@end group
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+@c endfile
+@c @end group
+@end example
+
+Special care is taken when the field delimiter is a space. Using
+@code{@w{" "}} (a single space) for the value of @code{FS} is
+incorrect---@code{awk} would
+separate fields with runs of spaces, tabs and/or newlines, and we want them to be
+separated with individual spaces. Also, note that after @code{getopt} is
+through, we have to clear out all the elements of @code{ARGV} from one to
+@code{Optind}, so that @code{awk} will not try to process the command line
+options as file names.
+
+After dealing with the command line options, the program verifies that the
+options make sense. Only one or the other of @samp{-c} and @samp{-f} should
+be used, and both require a field list. Then either @code{set_fieldlist} or
+@code{set_charlist} is called to pull apart the list of fields or
+characters.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+ if (by_fields && by_chars)
+ usage()
+
+ if (by_fields == 0 && by_chars == 0)
+ by_fields = 1 # default
+
+ if (fieldlist == "") @{
+ print "cut: needs list for -c or -f" > "/dev/stderr"
+ exit 1
+ @}
+
+@group
+ if (by_fields)
+ set_fieldlist()
+ else
+ set_charlist()
+@}
+@c endfile
+@end group
+@end example
+
+Here is @code{set_fieldlist}. It first splits the field list apart
+at the commas, into an array. Then, for each element of the array, it
+looks to see if it is actually a range, and if so splits it apart. The range
+is verified to make sure the first number is smaller than the second.
+Each number in the list is added to the @code{flist} array, which simply
+lists the fields that will be printed.
+Normal field splitting is used.
+The program lets @code{awk}
+handle the job of doing the field splitting.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+function set_fieldlist( n, m, i, j, k, f, g)
+@{
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) @{
+ if (index(f[i], "-") != 0) @{ # a range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) @{
+ printf("bad field list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ @}
+ for (k = g[1]; k <= g[2]; k++)
+ flist[j++] = k
+ @} else
+ flist[j++] = f[i]
+ @}
+ nfields = j - 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{set_charlist} function is more complicated than @code{set_fieldlist}.
+The idea here is to use @code{gawk}'s @code{FIELDWIDTHS} variable
+(@pxref{Constant Size, ,Reading Fixed-width Data}),
+which describes constant width input. When using a character list, that is
+exactly what we have.
+
+Setting up @code{FIELDWIDTHS} is more complicated than simply listing the
+fields that need to be printed. We have to keep track of the fields to be
+printed, and also the intervening characters that have to be skipped.
+For example, suppose you wanted characters one through eight, 15, and
+22 through 35. You would use @samp{-c 1-8,15,22-35}. The necessary value
+for @code{FIELDWIDTHS} would be @code{@w{"8 6 1 6 14"}}. This gives us five
+fields, and what should be printed are @code{$1}, @code{$3}, and @code{$5}.
+The intermediate fields are ``filler,'' stuff in between the desired data.
+
+@code{flist} lists the fields to be printed, and @code{t} tracks the
+complete field list, including filler fields.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+function set_charlist( field, i, j, f, g, t,
+ filler, last, len)
+@{
+ field = 1 # count total fields
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) @{
+ if (index(f[i], "-") != 0) @{ # range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) @{
+ printf("bad character list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ @}
+ len = g[2] - g[1] + 1
+ if (g[1] > 1) # compute length of filler
+ filler = g[1] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = len # length of field
+ last = g[2]
+ flist[j++] = field - 1
+ @} else @{
+ if (f[i] > 1)
+ filler = f[i] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = 1
+ last = f[i]
+ flist[j++] = field - 1
+ @}
+ @}
+@group
+ FIELDWIDTHS = join(t, 1, field - 1)
+ nfields = j - 1
+@}
+@end group
+@c endfile
+@end example
+
+Here is the rule that actually processes the data. If the @samp{-s} option
+was given, then @code{suppress} will be true. The first @code{if} statement
+makes sure that the input record does have the field separator. If
+@code{cut} is processing fields, @code{suppress} is true, and the field
+separator character is not in the record, then the record is skipped.
+
+If the record is valid, then at this point, @code{gawk} has split the data
+into fields, either using the character in @code{FS} or using fixed-length
+fields and @code{FIELDWIDTHS}. The loop goes through the list of fields
+that should be printed. If the corresponding field has data in it, it is
+printed. If the next field also has data, then the separator character is
+written out in between the fields.
+
+@c 2e: Could use `index($0, FS) != 0' instead of `$0 !~ FS', below
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+@{
+ if (by_fields && suppress && $0 !~ FS)
+ next
+
+ for (i = 1; i <= nfields; i++) @{
+ if ($flist[i] != "") @{
+ printf "%s", $flist[i]
+ if (i < nfields && $flist[i+1] != "")
+ printf "%s", OFS
+ @}
+ @}
+ print ""
+@}
+@c endfile
+@c @end group
+@end example
+
+This version of @code{cut} relies on @code{gawk}'s @code{FIELDWIDTHS}
+variable to do the character-based cutting. While it would be possible in
+other @code{awk} implementations to use @code{substr}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+it would also be extremely painful to do so.
+The @code{FIELDWIDTHS} variable supplies an elegant solution to the problem
+of picking the input line apart by characters.
+
+@node Egrep Program, Id Program, Cut Program, Clones
+@subsection Searching for Regular Expressions in Files
+
+@cindex @code{egrep} utility
+The @code{egrep} utility searches files for patterns. It uses regular
+expressions that are almost identical to those available in @code{awk}
+(@pxref{Regexp Constants, ,Regular Expression Constants}). It is used this way:
+
+@example
+egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
+@end example
+
+The @var{pattern} is a regexp.
+In typical usage, the regexp is quoted to prevent the shell from expanding
+any of the special characters as file name wildcards.
+Normally, @code{egrep} prints the
+lines that matched. If multiple file names are provided on the command
+line, each output line is preceded by the name of the file and a colon.
+
+@c NEEDED
+@page
+The options are:
+
+@table @code
+@item -c
+Print out a count of the lines that matched the pattern, instead of the
+lines themselves.
+
+@item -s
+Be silent. No output is produced, and the exit value indicates whether
+or not the pattern was matched.
+
+@item -v
+Invert the sense of the test. @code{egrep} prints the lines that do
+@emph{not} match the pattern, and exits successfully if the pattern was not
+matched.
+
+@item -i
+Ignore case distinctions in both the pattern and the input data.
+
+@item -l
+Only print the names of the files that matched, not the lines that matched.
+
+@item -e @var{pattern}
+Use @var{pattern} as the regexp to match. The purpose of the @samp{-e}
+option is to allow patterns that start with a @samp{-}.
+@end table
+
+This version uses the @code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the file transition library program
+(@pxref{Filetrans Function, ,Noting Data File Boundaries}).
+
+The program begins with a descriptive comment, and then a @code{BEGIN} rule
+that processes the command line arguments with @code{getopt}. The @samp{-i}
+(ignore case) option is particularly easy with @code{gawk}; we just use the
+@code{IGNORECASE} built in variable
+(@pxref{Built-in Variables}).
+
+@findex egrep.awk
+@example
+@c @group
+@c file eg/prog/egrep.awk
+# egrep.awk --- simulate egrep in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -c count of lines
+# -s silent - use exit value
+# -v invert test, success if no match
+# -i ignore case
+# -l print filenames only
+# -e argument is pattern
+
+BEGIN @{
+ while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) @{
+ if (c == "c")
+ count_only++
+ else if (c == "s")
+ no_print++
+ else if (c == "v")
+ invert++
+ else if (c == "i")
+ IGNORECASE = 1
+ else if (c == "l")
+ filenames_only++
+ else if (c == "e")
+ pattern = Optarg
+ else
+ usage()
+ @}
+@c endfile
+@c @end group
+@end example
+
+Next comes the code that handles the @code{egrep} specific behavior. If no
+pattern was supplied with @samp{-e}, the first non-option on the command
+line is used. The @code{awk} command line arguments up to @code{ARGV[Optind]}
+are cleared, so that @code{awk} won't try to process them as files. If no
+files were specified, the standard input is used, and if multiple files were
+specified, we make sure to note this so that the file names can precede the
+matched lines in the output.
+
+The last two lines are commented out, since they are not needed in
+@code{gawk}. They should be uncommented if you have to use another version
+of @code{awk}.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+ if (pattern == "")
+ pattern = ARGV[Optind++]
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+ if (Optind >= ARGC) @{
+ ARGV[1] = "-"
+ ARGC = 2
+ @} else if (ARGC - Optind > 1)
+ do_filenames++
+
+# if (IGNORECASE)
+# pattern = tolower(pattern)
+@}
+@c endfile
+@c @end group
+@end example
+
+The next set of lines should be uncommented if you are not using
+@code{gawk}. This rule translates all the characters in the input line
+into lower-case if the @samp{-i} option was specified. The rule is
+commented out since it is not necessary with @code{gawk}.
+@c bug: if a match happens, we output the translated line, not the original
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+#@{
+# if (IGNORECASE)
+# $0 = tolower($0)
+#@}
+@c endfile
+@c @end group
+@end example
+
+The @code{beginfile} function is called by the rule in @file{ftrans.awk}
+when each new file is processed. In this case, it is very simple; all it
+does is initialize a variable @code{fcount} to zero. @code{fcount} tracks
+how many lines in the current file matched the pattern.
+
+@example
+@group
+@c file eg/prog/egrep.awk
+function beginfile(junk)
+@{
+ fcount = 0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{endfile} function is called after each file has been processed.
+It is used only when the user wants a count of the number of lines that
+matched. @code{no_print} will be true only if the exit status is desired.
+@code{count_only} will be true if line counts are desired. @code{egrep}
+will therefore only print line counts if printing and counting are enabled.
+The output format must be adjusted depending upon the number of files to be
+processed. Finally, @code{fcount} is added to @code{total}, so that we
+know how many lines altogether matched the pattern.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function endfile(file)
+@{
+ if (! no_print && count_only)
+ if (do_filenames)
+ print file ":" fcount
+ else
+ print fcount
+
+ total += fcount
+@}
+@c endfile
+@c @end group
+@end example
+
+This rule does most of the work of matching lines. The variable
+@code{matches} will be true if the line matched the pattern. If the user
+wants lines that did not match, the sense of the @code{matches} is inverted
+using the @samp{!} operator. @code{fcount} is incremented with the value of
+@code{matches}, which will be either one or zero, depending upon a
+successful or unsuccessful match. If the line did not match, the
+@code{next} statement just moves on to the next record.
+
+There are several optimizations for performance in the following few lines
+of code. If the user only wants exit status (@code{no_print} is true), and
+we don't have to count lines, then it is enough to know that one line in
+this file matched, and we can skip on to the next file with @code{nextfile}.
+Along similar lines, if we are only printing file names, and we
+don't need to count lines, we can print the file name, and then skip to the
+next file with @code{nextfile}.
+
+Finally, each line is printed, with a leading filename and colon if
+necessary.
+
+@ignore
+2e: note, probably better to recode the last few lines as
+ if (! count_only) @{
+ if (no_print)
+ nextfile
+
+ if (filenames_only) @{
+ print FILENAME
+ nextfile
+ @}
+
+ if (do_filenames)
+ print FILENAME ":" $0
+ else
+ print
+ @}
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+@{
+ matches = ($0 ~ pattern)
+ if (invert)
+ matches = ! matches
+
+ fcount += matches # 1 or 0
+
+@group
+ if (! matches)
+ next
+@end group
+
+ if (no_print && ! count_only)
+ nextfile
+
+ if (filenames_only && ! count_only) @{
+ print FILENAME
+ nextfile
+ @}
+
+ if (do_filenames && ! count_only)
+ print FILENAME ":" $0
+ else if (! count_only)
+ print
+@}
+@c endfile
+@c @end group
+@end example
+
+@c @strong{Exercise}: rearrange the code inside @samp{if (! count_only)}.
+
+The @code{END} rule takes care of producing the correct exit status. If
+there were no matches, the exit status is one, otherwise it is zero.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+END \
+@{
+ if (total == 0)
+ exit 1
+ exit 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{usage} function prints a usage message in case of invalid options
+and then exits.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function usage( e)
+@{
+ e = "Usage: egrep [-csvil] [-e pat] [files ...]"
+ print e > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The variable @code{e} is used so that the function fits nicely
+on the printed page.
+
+@cindex backslash continuation
+Just a note on programming style. You may have noticed that the @code{END}
+rule uses backslash continuation, with the open brace on a line by
+itself. This is so that it more closely resembles the way functions
+are written. Many of the examples
+@iftex
+in this chapter
+@end iftex
+use this style. You can decide for yourself if you like writing
+your @code{BEGIN} and @code{END} rules this way,
+or not.
+
+@node Id Program, Split Program, Egrep Program, Clones
+@subsection Printing Out User Information
+
+@cindex @code{id} utility
+The @code{id} utility lists a user's real and effective user-id numbers,
+real and effective group-id numbers, and the user's group set, if any.
+@code{id} will only print the effective user-id and group-id if they are
+different from the real ones. If possible, @code{id} will also supply the
+corresponding user and group names. The output might look like this:
+
+@example
+$ id
+@print{} uid=2076(arnold) gid=10(staff) groups=10(staff),4(tty)
+@end example
+
+This information is exactly what is provided by @code{gawk}'s
+@file{/dev/user} special file (@pxref{Special Files, ,Special File Names in @code{gawk}}).
+However, the @code{id} utility provides a more palatable output than just a
+string of numbers.
+
+Here is a simple version of @code{id} written in @code{awk}.
+It uses the user database library functions
+(@pxref{Passwd Functions, ,Reading the User Database}),
+and the group database library functions
+(@pxref{Group Functions, ,Reading the Group Database}).
+
+The program is fairly straightforward. All the work is done in the
+@code{BEGIN} rule. The user and group id numbers are obtained from
+@file{/dev/user}. If there is no support for @file{/dev/user}, the program
+gives up.
+
+The code is repetitive. The entry in the user database for the real user-id
+number is split into parts at the @samp{:}. The name is the first field.
+Similar code is used for the effective user-id number, and the group
+numbers.
+
+@findex id.awk
+@example
+@c @group
+@c file eg/prog/id.awk
+# id.awk --- implement id in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# output is:
+# uid=12(foo) euid=34(bar) gid=3(baz) \
+# egid=5(blat) groups=9(nine),2(two),1(one)
+
+BEGIN \
+@{
+ if ((getline < "/dev/user") < 0) @{
+ err = "id: no /dev/user support - cannot run"
+ print err > "/dev/stderr"
+ exit 1
+ @}
+ close("/dev/user")
+
+ uid = $1
+ euid = $2
+ gid = $3
+ egid = $4
+
+ printf("uid=%d", uid)
+ pw = getpwuid(uid)
+@group
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+@end group
+
+ if (euid != uid) @{
+ printf(" euid=%d", euid)
+ pw = getpwuid(euid)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+ @}
+
+ printf(" gid=%d", gid)
+ pw = getgrgid(gid)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+
+ if (egid != gid) @{
+ printf(" egid=%d", egid)
+ pw = getgrgid(egid)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+ @}
+
+ if (NF > 4) @{
+ printf(" groups=");
+ for (i = 5; i <= NF; i++) @{
+ printf("%d", $i)
+ pw = getgrgid($i)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+@group
+ if (i < NF)
+ printf(",")
+@end group
+ @}
+ @}
+ print ""
+@}
+@c endfile
+@c @end group
+@end example
+
+@c exercise!!!
+@ignore
+The POSIX version of @code{id} takes arguments that control which
+information is printed. Modify this version to accept the same
+arguments and perform in the same way.
+@end ignore
+
+@node Split Program, Tee Program, Id Program, Clones
+@subsection Splitting a Large File Into Pieces
+
+@cindex @code{split} utility
+The @code{split} program splits large text files into smaller pieces. By default,
+the output files are named @file{xaa}, @file{xab}, and so on. Each file has
+1000 lines in it, with the likely exception of the last file. To change the
+number of lines in each file, you supply a number on the command line
+preceded with a minus, e.g., @samp{-500} for files with 500 lines in them
+instead of 1000. To change the name of the output files to something like
+@file{myfileaa}, @file{myfileab}, and so on, you supply an additional
+argument that specifies the filename.
+
+Here is a version of @code{split} in @code{awk}. It uses the @code{ord} and
+@code{chr} functions presented in
+@ref{Ordinal Functions, ,Translating Between Characters and Numbers}.
+
+The program first sets its defaults, and then tests to make sure there are
+not too many arguments. It then looks at each argument in turn. The
+first argument could be a minus followed by a number. If it is, this happens
+to look like a negative number, so it is made positive, and that is the
+count of lines. The data file name is skipped over, and the final argument
+is used as the prefix for the output file names.
+
+@findex split.awk
+@example
+@c @group
+@c file eg/prog/split.awk
+# split.awk --- do split in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: split [-num] [file] [outname]
+
+BEGIN @{
+ outfile = "x" # default
+ count = 1000
+ if (ARGC > 4)
+ usage()
+
+ i = 1
+ if (ARGV[i] ~ /^-[0-9]+$/) @{
+ count = -ARGV[i]
+ ARGV[i] = ""
+ i++
+ @}
+ # test argv in case reading from stdin instead of file
+ if (i in ARGV)
+ i++ # skip data file name
+ if (i in ARGV) @{
+ outfile = ARGV[i]
+ ARGV[i] = ""
+ @}
+
+ s1 = s2 = "a"
+ out = (outfile s1 s2)
+@}
+@c endfile
+@c @end group
+@end example
+
+The next rule does most of the work. @code{tcount} (temporary count) tracks
+how many lines have been printed to the output file so far. If it is greater
+than @code{count}, it is time to close the current file and start a new one.
+@code{s1} and @code{s2} track the current suffixes for the file name. If
+they are both @samp{z}, the file is just too big. Otherwise, @code{s1}
+moves to the next letter in the alphabet and @code{s2} starts over again at
+@samp{a}.
+
+@example
+@c @group
+@c file eg/prog/split.awk
+@{
+ if (++tcount > count) @{
+ close(out)
+ if (s2 == "z") @{
+ if (s1 == "z") @{
+ printf("split: %s is too large to split\n", \
+ FILENAME) > "/dev/stderr"
+ exit 1
+ @}
+ s1 = chr(ord(s1) + 1)
+ s2 = "a"
+ @} else
+ s2 = chr(ord(s2) + 1)
+ out = (outfile s1 s2)
+ tcount = 1
+ @}
+ print > out
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{usage} function simply prints an error message and exits.
+
+@example
+@c @group
+@c file eg/prog/split.awk
+function usage( e)
+@{
+ e = "usage: split [-num] [file] [outname]"
+ print e > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+@noindent
+The variable @code{e} is used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+This program is a bit sloppy; it relies on @code{awk} to close the last file
+for it automatically, instead of doing it in an @code{END} rule.
+
+@node Tee Program, Uniq Program, Split Program, Clones
+@subsection Duplicating Output Into Multiple Files
+
+@cindex @code{tee} utility
+The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
+its standard input to its standard output, and also duplicates it to the
+files named on the command line. Its usage is:
+
+@example
+tee @r{[}-a@r{]} file @dots{}
+@end example
+
+The @samp{-a} option tells @code{tee} to append to the named files, instead of
+truncating them and starting over.
+
+The @code{BEGIN} rule first makes a copy of all the command line arguments,
+into an array named @code{copy}.
+@code{ARGV[0]} is not copied, since it is not needed.
+@code{tee} cannot use @code{ARGV} directly, since @code{awk} will attempt to
+process each file named in @code{ARGV} as input data.
+
+If the first argument is @samp{-a}, then the flag variable
+@code{append} is set to true, and both @code{ARGV[1]} and
+@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no file
+names were supplied, and @code{tee} prints a usage message and exits.
+Finally, @code{awk} is forced to read the standard input by setting
+@code{ARGV[1]} to @code{"-"}, and @code{ARGC} to two.
+
+@c 2e: the `ARGC--' in the `if (ARGV[1] == "-a")' isn't needed.
+
+@findex tee.awk
+@example
+@group
+@c file eg/prog/tee.awk
+# tee.awk --- tee in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+# Revised December 1995
+@end group
+
+@group
+BEGIN \
+@{
+ for (i = 1; i < ARGC; i++)
+ copy[i] = ARGV[i]
+@end group
+
+@group
+ if (ARGV[1] == "-a") @{
+ append = 1
+ delete ARGV[1]
+ delete copy[1]
+ ARGC--
+ @}
+@end group
+@group
+ if (ARGC < 2) @{
+ print "usage: tee [-a] file ..." > "/dev/stderr"
+ exit 1
+ @}
+@end group
+@group
+ ARGV[1] = "-"
+ ARGC = 2
+@}
+@c endfile
+@end group
+@end example
+
+The single rule does all the work. Since there is no pattern, it is
+executed for each line of input. The body of the rule simply prints the
+line into each file on the command line, and then to the standard output.
+
+@example
+@group
+@c file eg/prog/tee.awk
+@{
+ # moving the if outside the loop makes it run faster
+ if (append)
+ for (i in copy)
+ print >> copy[i]
+ else
+ for (i in copy)
+ print > copy[i]
+ print
+@}
+@c endfile
+@end group
+@end example
+
+It would have been possible to code the loop this way:
+
+@example
+for (i in copy)
+ if (append)
+ print >> copy[i]
+ else
+ print > copy[i]
+@end example
+
+@noindent
+This is more concise, but it is also less efficient. The @samp{if} is
+tested for each record and for each output file. By duplicating the loop
+body, the @samp{if} is only tested once for each input record. If there are
+@var{N} input records and @var{M} input files, the first method only
+executes @var{N} @samp{if} statements, while the second would execute
+@var{N}@code{*}@var{M} @samp{if} statements.
+
+Finally, the @code{END} rule cleans up, by closing all the output files.
+
+@example
+@c @group
+@c file eg/prog/tee.awk
+END \
+@{
+ for (i in copy)
+ close(copy[i])
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Uniq Program, Wc Program, Tee Program, Clones
+@subsection Printing Non-duplicated Lines of Text
+
+@cindex @code{uniq} utility
+The @code{uniq} utility reads sorted lines of data on its standard input,
+and (by default) removes duplicate lines. In other words, only unique lines
+are printed, hence the name. @code{uniq} has a number of options. The usage is:
+
+@example
+uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
+@end example
+
+The option meanings are:
+
+@table @code
+@item -d
+Only print repeated lines.
+
+@item -u
+Only print non-repeated lines.
+
+@item -c
+Count lines. This option overrides @samp{-d} and @samp{-u}. Both repeated
+and non-repeated lines are counted.
+
+@item -@var{n}
+Skip @var{n} fields before comparing lines. The definition of fields
+is similar to @code{awk}'s default: non-whitespace characters separated
+by runs of spaces and/or tabs.
+
+@item +@var{n}
+Skip @var{n} characters before comparing lines. Any fields specified with
+@samp{-@var{n}} are skipped first.
+
+@item @var{input file}
+Data is read from the input file named on the command line, instead of from
+the standard input.
+
+@item @var{output file}
+The generated output is sent to the named output file, instead of to the
+standard output.
+@end table
+
+Normally @code{uniq} behaves as if both the @samp{-d} and @samp{-u} options
+had been provided.
+
+Here is an @code{awk} implementation of @code{uniq}. It uses the
+@code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The program begins with a @code{usage} function and then a brief outline of
+the options and their meanings in a comment.
+
+The @code{BEGIN} rule deals with the command line arguments and options. It
+uses a trick to get @code{getopt} to handle options of the form @samp{-25},
+treating such an option as the option letter @samp{2} with an argument of
+@samp{5}. If indeed two or more digits were supplied (@code{Optarg} looks
+like a number), @code{Optarg} is
+concatenated with the option digit, and then result is added to zero to make
+it into a number. If there is only one digit in the option, then
+@code{Optarg} is not needed, and @code{Optind} must be decremented so that
+@code{getopt} will process it next time. This code is admittedly a bit
+tricky.
+
+If no options were supplied, then the default is taken, to print both
+repeated and non-repeated lines. The output file, if provided, is assigned
+to @code{outputfile}. Earlier, @code{outputfile} was initialized to the
+standard output, @file{/dev/stdout}.
+
+@findex uniq.awk
+@example
+@c @group
+@c file eg/prog/uniq.awk
+# uniq.awk --- do uniq in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+@group
+function usage( e)
+@{
+ e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
+ print e > "/dev/stderr"
+ exit 1
+@}
+@end group
+
+@group
+# -c count lines. overrides -d and -u
+# -d only repeated lines
+# -u only non-repeated lines
+# -n skip n fields
+# +n skip n characters, skip fields first
+@end group
+
+BEGIN \
+@{
+ count = 1
+ outputfile = "/dev/stdout"
+ opts = "udc0:1:2:3:4:5:6:7:8:9:"
+ while ((c = getopt(ARGC, ARGV, opts)) != -1) @{
+ if (c == "u")
+ non_repeated_only++
+ else if (c == "d")
+ repeated_only++
+ else if (c == "c")
+ do_count++
+ else if (index("0123456789", c) != 0) @{
+ # getopt requires args to options
+ # this messes us up for things like -5
+ if (Optarg ~ /^[0-9]+$/)
+ fcount = (c Optarg) + 0
+ else @{
+ fcount = c + 0
+ Optind--
+ @}
+ @} else
+ usage()
+ @}
+
+ if (ARGV[Optind] ~ /^\+[0-9]+$/) @{
+ charcount = substr(ARGV[Optind], 2) + 0
+ Optind++
+ @}
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ if (repeated_only == 0 && non_repeated_only == 0)
+ repeated_only = non_repeated_only = 1
+
+@group
+ if (ARGC - Optind == 2) @{
+ outputfile = ARGV[ARGC - 1]
+ ARGV[ARGC - 1] = ""
+ @}
+@}
+@c endfile
+@end group
+@end example
+
+The following function, @code{are_equal}, compares the current line,
+@code{$0}, to the
+previous line, @code{last}. It handles skipping fields and characters.
+
+If no field count and no character count were specified, @code{are_equal}
+simply returns one or zero depending upon the result of a simple string
+comparison of @code{last} and @code{$0}. Otherwise, things get more
+complicated.
+
+If fields have to be skipped, each line is broken into an array using
+@code{split}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+and then the desired fields are joined back into a line using @code{join}.
+The joined lines are stored in @code{clast} and @code{cline}.
+If no fields are skipped, @code{clast} and @code{cline} are set to
+@code{last} and @code{$0} respectively.
+
+Finally, if characters are skipped, @code{substr} is used to strip off the
+leading @code{charcount} characters in @code{clast} and @code{cline}. The
+two strings are then compared, and @code{are_equal} returns the result.
+
+@example
+@c @group
+@c file eg/prog/uniq.awk
+function are_equal( n, m, clast, cline, alast, aline)
+@{
+ if (fcount == 0 && charcount == 0)
+ return (last == $0)
+
+ if (fcount > 0) @{
+ n = split(last, alast)
+ m = split($0, aline)
+ clast = join(alast, fcount+1, n)
+ cline = join(aline, fcount+1, m)
+ @} else @{
+ clast = last
+ cline = $0
+ @}
+ if (charcount) @{
+ clast = substr(clast, charcount + 1)
+ cline = substr(cline, charcount + 1)
+ @}
+
+ return (clast == cline)
+@}
+@c endfile
+@c @end group
+@end example
+
+The following two rules are the body of the program. The first one is
+executed only for the very first line of data. It sets @code{last} equal to
+@code{$0}, so that subsequent lines of text have something to be compared to.
+
+The second rule does the work. The variable @code{equal} will be one or zero
+depending upon the results of @code{are_equal}'s comparison. If @code{uniq}
+is counting repeated lines, then the @code{count} variable is incremented if
+the lines are equal. Otherwise the line is printed and @code{count} is
+reset, since the two lines are not equal.
+
+If @code{uniq} is not counting, @code{count} is incremented if the lines are
+equal. Otherwise, if @code{uniq} is counting repeated lines, and more than
+one line has been seen, or if @code{uniq} is counting non-repeated lines,
+and only one line has been seen, then the line is printed, and @code{count}
+is reset.
+
+Finally, similar logic is used in the @code{END} rule to print the final
+line of input data.
+
+@example
+@c @group
+@c file eg/prog/uniq.awk
+@group
+NR == 1 @{
+ last = $0
+ next
+@}
+@end group
+
+@{
+ equal = are_equal()
+
+ if (do_count) @{ # overrides -d and -u
+ if (equal)
+ count++
+ else @{
+ printf("%4d %s\n", count, last) > outputfile
+ last = $0
+ count = 1 # reset
+ @}
+ next
+ @}
+
+ if (equal)
+ count++
+ else @{
+ if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+ last = $0
+ count = 1
+ @}
+@}
+
+@group
+END @{
+ if (do_count)
+ printf("%4d %s\n", count, last) > outputfile
+ else if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+@}
+@end group
+@c endfile
+@c @end group
+@end example
+
+@node Wc Program, , Uniq Program, Clones
+@subsection Counting Things
+
+@cindex @code{wc} utility
+The @code{wc} (word count) utility counts lines, words, and characters in
+one or more input files. Its usage is:
+
+@example
+wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
+@end example
+
+If no files are specified on the command line, @code{wc} reads its standard
+input. If there are multiple files, it will also print total counts for all
+the files. The options and their meanings are:
+
+@table @code
+@item -l
+Only count lines.
+
+@item -w
+Only count words.
+A ``word'' is a contiguous sequence of non-whitespace characters, separated
+by spaces and/or tabs. Happily, this is the normal way @code{awk} separates
+fields in its input data.
+
+@item -c
+Only count characters.
+@end table
+
+Implementing @code{wc} in @code{awk} is particularly elegant, since
+@code{awk} does a lot of the work for us; it splits lines into words (i.e.@:
+fields) and counts them, it counts lines (i.e.@: records) for us, and it can
+easily tell us how long a line is.
+
+This version uses the @code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the file transition functions
+(@pxref{Filetrans Function, ,Noting Data File Boundaries}).
+
+This version has one major difference from traditional versions of @code{wc}.
+Our version always prints the counts in the order lines, words,
+and characters. Traditional versions note the order of the @samp{-l},
+@samp{-w}, and @samp{-c} options on the command line, and print the counts
+in that order.
+
+The @code{BEGIN} rule does the argument processing.
+The variable @code{print_total} will
+be true if more than one file was named on the command line.
+
+@findex wc.awk
+@example
+@c @group
+@c file eg/prog/wc.awk
+# wc.awk --- count lines, words, characters
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -l only count lines
+# -w only count words
+# -c only count characters
+#
+# Default is to count lines, words, characters
+
+BEGIN @{
+ # let getopt print a message about
+ # invalid options. we ignore them
+ while ((c = getopt(ARGC, ARGV, "lwc")) != -1) @{
+ if (c == "l")
+ do_lines = 1
+ else if (c == "w")
+ do_words = 1
+ else if (c == "c")
+ do_chars = 1
+ @}
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ # if no options, do all
+ if (! do_lines && ! do_words && ! do_chars)
+ do_lines = do_words = do_chars = 1
+
+ print_total = (ARGC - i > 2)
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{beginfile} function is simple; it just resets the counts of lines,
+words, and characters to zero, and saves the current file name in
+@code{fname}.
+
+The @code{endfile} function adds the current file's numbers to the running
+totals of lines, words, and characters. It then prints out those numbers
+for the file that was just read. It relies on @code{beginfile} to reset the
+numbers for the following data file.
+
+@example
+@c @group
+@c file eg/prog/wc.awk
+function beginfile(file)
+@{
+ chars = lines = words = 0
+ fname = FILENAME
+@}
+
+function endfile(file)
+@{
+ tchars += chars
+ tlines += lines
+ twords += words
+@group
+ if (do_lines)
+ printf "\t%d", lines
+@end group
+ if (do_words)
+ printf "\t%d", words
+ if (do_chars)
+ printf "\t%d", chars
+ printf "\t%s\n", fname
+@}
+@c endfile
+@c @end group
+@end example
+
+There is one rule that is executed for each line. It adds the length of the
+record to @code{chars}. It has to add one, since the newline character
+separating records (the value of @code{RS}) is not part of the record
+itself. @code{lines} is incremented for each line read, and @code{words} is
+incremented by the value of @code{NF}, the number of ``words'' on this
+line.@footnote{Examine the code in
+@ref{Filetrans Function, ,Noting Data File Boundaries}.
+Why must @code{wc} use a separate @code{lines} variable, instead of using
+the value of @code{FNR} in @code{endfile}?}
+
+Finally, the @code{END} rule simply prints the totals for all the files.
+
+@example
+@c @group
+@c file eg/prog/wc.awk
+# do per line
+@{
+ chars += length($0) + 1 # get newline
+ lines++
+ words += NF
+@}
+
+END @{
+ if (print_total) @{
+ if (do_lines)
+ printf "\t%d", tlines
+ if (do_words)
+ printf "\t%d", twords
+ if (do_chars)
+ printf "\t%d", tchars
+ print "\ttotal"
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Miscellaneous Programs, , Clones, Sample Programs
+@section A Grab Bag of @code{awk} Programs
+
+This section is a large ``grab bag'' of miscellaneous programs.
+We hope you find them both interesting and enjoyable.
+
+@menu
+* Dupword Program:: Finding duplicated words in a document.
+* Alarm Program:: An alarm clock.
+* Translate Program:: A program similar to the @code{tr} utility.
+* Labels Program:: Printing mailing labels.
+* Word Sorting:: A program to produce a word usage count.
+* History Sorting:: Eliminating duplicate entries from a history
+ file.
+* Extract Program:: Pulling out programs from Texinfo source
+ files.
+* Simple Sed:: A Simple Stream Editor.
+* Igawk Program:: A wrapper for @code{awk} that includes files.
+@end menu
+
+@node Dupword Program, Alarm Program, Miscellaneous Programs, Miscellaneous Programs
+@subsection Finding Duplicated Words in a Document
+
+A common error when writing large amounts of prose is to accidentally
+duplicate words. Often you will see this in text as something like ``the
+the program does the following @dots{}.'' When the text is on-line, often
+the duplicated words occur at the end of one line and the beginning of
+another, making them very difficult to spot.
+@c as here!
+
+This program, @file{dupword.awk}, scans through a file one line at a time,
+and looks for adjacent occurrences of the same word. It also saves the last
+word on a line (in the variable @code{prev}) for comparison with the first
+word on the next line.
+
+The first two statements make sure that the line is all lower-case, so that,
+for example,
+``The'' and ``the'' compare equal to each other. The second statement
+removes all non-alphanumeric and non-whitespace characters from the line, so
+that punctuation does not affect the comparison either. This sometimes
+leads to reports of duplicated words that really are different, but this is
+unusual.
+
+@c FIXME: add check for $i != ""
+@findex dupword.awk
+@example
+@group
+@c file eg/prog/dupword.awk
+# dupword --- find duplicate words in text
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# December 1991
+
+@{
+ $0 = tolower($0)
+ gsub(/[^A-Za-z0-9 \t]/, "");
+ if ($1 == prev)
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $1)
+ for (i = 2; i <= NF; i++)
+ if ($i == $(i-1))
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $i)
+ prev = $NF
+@}
+@c endfile
+@end group
+@end example
+
+@node Alarm Program, Translate Program, Dupword Program, Miscellaneous Programs
+@subsection An Alarm Clock Program
+
+The following program is a simple ``alarm clock'' program.
+You give it a time of day, and an optional message. At the given time,
+it prints the message on the standard output. In addition, you can give it
+the number of times to repeat the message, and also a delay between
+repetitions.
+
+This program uses the @code{gettimeofday} function from
+@ref{Gettimeofday Function, ,Managing the Time of Day}.
+
+All the work is done in the @code{BEGIN} rule. The first part is argument
+checking and setting of defaults; the delay, the count, and the message to
+print. If the user supplied a message, but it does not contain the ASCII BEL
+character (known as the ``alert'' character, @samp{\a}), then it is added to
+the message. (On many systems, printing the ASCII BEL generates some sort
+of audible alert. Thus, when the alarm goes off, the system calls attention
+to itself, in case the user is not looking at their computer or terminal.)
+
+@findex alarm.awk
+@example
+@c @group
+@c file eg/prog/alarm.awk
+# alarm --- set an alarm
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: alarm time [ "message" [ count [ delay ] ] ]
+
+BEGIN \
+@{
+ # Initial argument sanity checking
+ usage1 = "usage: alarm time ['message' [count [delay]]]"
+ usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
+
+ if (ARGC < 2) @{
+ print usage > "/dev/stderr"
+ exit 1
+ @} else if (ARGC == 5) @{
+ delay = ARGV[4] + 0
+ count = ARGV[3] + 0
+ message = ARGV[2]
+ @} else if (ARGC == 4) @{
+ count = ARGV[3] + 0
+ message = ARGV[2]
+ @} else if (ARGC == 3) @{
+ message = ARGV[2]
+ @} else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) @{
+ print usage1 > "/dev/stderr"
+ print usage2 > "/dev/stderr"
+ exit 1
+ @}
+
+ # set defaults for once we reach the desired time
+ if (delay == 0)
+ delay = 180 # 3 minutes
+ if (count == 0)
+ count = 5
+@group
+ if (message == "")
+ message = sprintf("\aIt is now %s!\a", ARGV[1])
+ else if (index(message, "\a") == 0)
+ message = "\a" message "\a"
+@end group
+@c endfile
+@end example
+
+The next section of code turns the alarm time into hours and minutes,
+and converts it if necessary to a 24-hour clock. Then it turns that
+time into a count of the seconds since midnight. Next it turns the current
+time into a count of seconds since midnight. The difference between the two
+is how long to wait before setting off the alarm.
+
+@example
+@c @group
+@c file eg/prog/alarm.awk
+ # split up dest time
+ split(ARGV[1], atime, ":")
+ hour = atime[1] + 0 # force numeric
+ minute = atime[2] + 0 # force numeric
+
+ # get current broken down time
+ gettimeofday(now)
+
+ # if time given is 12-hour hours and it's after that
+ # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m.,
+ # then add 12 to real hour
+ if (hour < 12 && now["hour"] > hour)
+ hour += 12
+
+ # set target time in seconds since midnight
+ target = (hour * 60 * 60) + (minute * 60)
+
+ # get current time in seconds since midnight
+ current = (now["hour"] * 60 * 60) + \
+ (now["minute"] * 60) + now["second"]
+
+ # how long to sleep for
+ naptime = target - current
+ if (naptime <= 0) @{
+ print "time is in the past!" > "/dev/stderr"
+ exit 1
+ @}
+@c endfile
+@c @end group
+@end example
+
+Finally, the program uses the @code{system} function
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output})
+to call the @code{sleep} utility. The @code{sleep} utility simply pauses
+for the given number of seconds. If the exit status is not zero,
+the program assumes that @code{sleep} was interrupted, and exits. If
+@code{sleep} exited with an OK status (zero), then the program prints the
+message in a loop, again using @code{sleep} to delay for however many
+seconds are necessary.
+
+@example
+@c @group
+@c file eg/prog/alarm.awk
+ # zzzzzz..... go away if interrupted
+ if (system(sprintf("sleep %d", naptime)) != 0)
+ exit 1
+
+ # time to notify!
+ command = sprintf("sleep %d", delay)
+ for (i = 1; i <= count; i++) @{
+ print message
+ # if sleep command interrupted, go away
+ if (system(command) != 0)
+ break
+ @}
+
+ exit 0
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Translate Program, Labels Program, Alarm Program, Miscellaneous Programs
+@subsection Transliterating Characters
+
+The system @code{tr} utility transliterates characters. For example, it is
+often used to map upper-case letters into lower-case, for further
+processing.
+
+@example
+@var{generate data} | tr '[A-Z]' '[a-z]' | @var{process data} @dots{}
+@end example
+
+You give @code{tr} two lists of characters enclosed in square brackets.
+Usually, the lists are quoted to keep the shell from attempting to do a
+filename expansion.@footnote{On older, non-POSIX systems, @code{tr} often
+does not require that the lists be enclosed in square brackets and quoted.
+This is a feature.} When processing the input, the
+first character in the first list is replaced with the first character in the
+second list, the second character in the first list is replaced with the
+second character in the second list, and so on.
+If there are more characters in the ``from'' list than in the ``to'' list,
+the last character of the ``to'' list is used for the remaining characters
+in the ``from'' list.
+
+Some time ago,
+@c early or mid-1989!
+a user proposed to us that we add a transliteration function to @code{gawk}.
+Being opposed to ``creeping featurism,'' I wrote the following program to
+prove that character transliteration could be done with a user-level
+function. This program is not as complete as the system @code{tr} utility,
+but it will do most of the job.
+
+The @code{translate} program demonstrates one of the few weaknesses of
+standard
+@code{awk}: dealing with individual characters is very painful, requiring
+repeated use of the @code{substr}, @code{index}, and @code{gsub} built-in
+functions
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@footnote{This
+program was written before @code{gawk} acquired the ability to
+split each character in a string into separate array elements.
+How might this ability simplify the program?}
+
+There are two functions. The first, @code{stranslate}, takes three
+arguments.
+
+@table @code
+@item from
+A list of characters to translate from.
+
+@item to
+A list of characters to translate to.
+
+@item target
+The string to do the translation on.
+@end table
+
+Associative arrays make the translation part fairly easy. @code{t_ar} holds
+the ``to'' characters, indexed by the ``from'' characters. Then a simple
+loop goes through @code{from}, one character at a time. For each character
+in @code{from}, if the character appears in @code{target}, @code{gsub}
+is used to change it to the corresponding @code{to} character.
+
+The @code{translate} function simply calls @code{stranslate} using @code{$0}
+as the target. The main program sets two global variables, @code{FROM} and
+@code{TO}, from the command line, and then changes @code{ARGV} so that
+@code{awk} will read from the standard input.
+
+Finally, the processing rule simply calls @code{translate} for each record.
+
+@findex translate.awk
+@example
+@c @group
+@c file eg/prog/translate.awk
+# translate --- do tr like stuff
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# August 1989
+
+# bugs: does not handle things like: tr A-Z a-z, it has
+# to be spelled out. However, if `to' is shorter than `from',
+# the last character in `to' is used for the rest of `from'.
+
+function stranslate(from, to, target, lf, lt, t_ar, i, c)
+@{
+ lf = length(from)
+ lt = length(to)
+ for (i = 1; i <= lt; i++)
+ t_ar[substr(from, i, 1)] = substr(to, i, 1)
+ if (lt < lf)
+ for (; i <= lf; i++)
+ t_ar[substr(from, i, 1)] = substr(to, lt, 1)
+ for (i = 1; i <= lf; i++) @{
+ c = substr(from, i, 1)
+ if (index(target, c) > 0)
+ gsub(c, t_ar[c], target)
+ @}
+ return target
+@}
+
+@group
+function translate(from, to)
+@{
+ return $0 = stranslate(from, to, $0)
+@}
+@end group
+
+# main program
+BEGIN @{
+ if (ARGC < 3) @{
+ print "usage: translate from to" > "/dev/stderr"
+ exit
+ @}
+ FROM = ARGV[1]
+ TO = ARGV[2]
+ ARGC = 2
+ ARGV[1] = "-"
+@}
+
+@{
+ translate(FROM, TO)
+ print
+@}
+@c endfile
+@c @end group
+@end example
+
+While it is possible to do character transliteration in a user-level
+function, it is not necessarily efficient, and we started to consider adding
+a built-in function. However, shortly after writing this program, we learned
+that the System V Release 4 @code{awk} had added the @code{toupper} and
+@code{tolower} functions. These functions handle the vast majority of the
+cases where character transliteration is necessary, and so we chose to
+simply add those functions to @code{gawk} as well, and then leave well
+enough alone.
+
+An obvious improvement to this program would be to set up the
+@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
+assumes that the ``from'' and ``to'' lists
+will never change throughout the lifetime of the program.
+
+@node Labels Program, Word Sorting, Translate Program, Miscellaneous Programs
+@subsection Printing Mailing Labels
+
+Here is a ``real world''@footnote{``Real world'' is defined as
+``a program actually used to get something done.''}
+program. This script reads lists of names and
+addresses, and generates mailing labels. Each page of labels has 20 labels
+on it, two across and ten down. The addresses are guaranteed to be no more
+than five lines of data. Each address is separated from the next by a blank
+line.
+
+The basic idea is to read 20 labels worth of data. Each line of each label
+is stored in the @code{line} array. The single rule takes care of filling
+the @code{line} array and printing the page when 20 labels have been read.
+
+The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
+@code{awk} will split records at blank lines
+(@pxref{Records, ,How Input is Split into Records}).
+It sets @code{MAXLINES} to 100, since @code{MAXLINE} is the maximum number
+of lines on the page (20 * 5 = 100).
+
+Most of the work is done in the @code{printpage} function.
+The label lines are stored sequentially in the @code{line} array. But they
+have to be printed horizontally; @code{line[1]} next to @code{line[6]},
+@code{line[2]} next to @code{line[7]}, and so on. Two loops are used to
+accomplish this. The outer loop, controlled by @code{i}, steps through
+every 10 lines of data; this is each row of labels. The inner loop,
+controlled by @code{j}, goes through the lines within the row.
+As @code{j} goes from zero to four, @samp{i+j} is the @code{j}'th line in
+the row, and @samp{i+j+5} is the entry next to it. The output ends up
+looking something like this:
+
+@example
+line 1 line 6
+line 2 line 7
+line 3 line 8
+line 4 line 9
+line 5 line 10
+@end example
+
+As a final note, at lines 21 and 61, an extra blank line is printed, to keep
+the output lined up on the labels. This is dependent on the particular
+brand of labels in use when the program was written. You will also note
+that there are two blank lines at the top and two blank lines at the bottom.
+
+The @code{END} rule arranges to flush the final page of labels; there may
+not have been an even multiple of 20 labels in the data.
+
+@findex labels.awk
+@example
+@c @group
+@c file eg/prog/labels.awk
+# labels.awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# June 1992
+
+# Program to print labels. Each label is 5 lines of data
+# that may have blank lines. The label sheets have 2
+# blank lines at the top and 2 at the bottom.
+
+BEGIN @{ RS = "" ; MAXLINES = 100 @}
+
+function printpage( i, j)
+@{
+ if (Nlines <= 0)
+ return
+
+ printf "\n\n" # header
+
+ for (i = 1; i <= Nlines; i += 10) @{
+ if (i == 21 || i == 61)
+ print ""
+ for (j = 0; j < 5; j++) @{
+ if (i + j > MAXLINES)
+ break
+ printf " %-41s %s\n", line[i+j], line[i+j+5]
+ @}
+ print ""
+ @}
+
+ printf "\n\n" # footer
+
+ for (i in line)
+ line[i] = ""
+@}
+
+# main rule
+@{
+ if (Count >= 20) @{
+ printpage()
+ Count = 0
+ Nlines = 0
+ @}
+ n = split($0, a, "\n")
+ for (i = 1; i <= n; i++)
+ line[++Nlines] = a[i]
+ for (; i <= 5; i++)
+ line[++Nlines] = ""
+ Count++
+@}
+
+END \
+@{
+ printpage()
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Word Sorting, History Sorting, Labels Program, Miscellaneous Programs
+@subsection Generating Word Usage Counts
+
+The following @code{awk} program prints
+the number of occurrences of each word in its input. It illustrates the
+associative nature of @code{awk} arrays by using strings as subscripts. It
+also demonstrates the @samp{for @var{x} in @var{array}} construction.
+Finally, it shows how @code{awk} can be used in conjunction with other
+utility programs to do a useful task of some complexity with a minimum of
+effort. Some explanations follow the program listing.
+
+@example
+awk '
+# Print list of word frequencies
+@{
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+The first thing to notice about this program is that it has two rules. The
+first rule, because it has an empty pattern, is executed on every line of
+the input. It uses @code{awk}'s field-accessing mechanism
+(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
+the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
+to know how many fields are available.
+
+For each input word, an element of the array @code{freq} is incremented to
+reflect that the word has been seen an additional time.
+
+The second rule, because it has the pattern @code{END}, is not executed
+until the input has been exhausted. It prints out the contents of the
+@code{freq} table that has been built up inside the first action.
+
+This program has several problems that would prevent it from being
+useful by itself on real text files:
+
+@itemize @bullet
+@item
+Words are detected using the @code{awk} convention that fields are
+separated by whitespace and that other characters in the input (except
+newlines) don't have any special meaning to @code{awk}. This means that
+punctuation characters count as part of words.
+
+@item
+The @code{awk} language considers upper- and lower-case characters to be
+distinct. Therefore, @samp{bartender} and @samp{Bartender} are not treated
+as the same word. This is undesirable since, in normal text, words
+are capitalized if they begin sentences, and a frequency analyzer should not
+be sensitive to capitalization.
+
+@item
+The output does not come out in any useful order. You're more likely to be
+interested in which words occur most frequently, or having an alphabetized
+table of how frequently each word occurs.
+@end itemize
+
+The way to solve these problems is to use some of the more advanced
+features of the @code{awk} language. First, we use @code{tolower} to remove
+case distinctions. Next, we use @code{gsub} to remove punctuation
+characters. Finally, we use the system @code{sort} utility to process the
+output of the @code{awk} script. Here is the new version of
+the program:
+
+@findex wordfreq.sh
+@example
+@c file eg/prog/wordfreq.awk
+# Print list of word frequencies
+@{
+ $0 = tolower($0) # remove case distinctions
+ gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+@c endfile
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}
+@end example
+
+Assuming we have saved this program in a file named @file{wordfreq.awk},
+and that the data is in @file{file1}, the following pipeline
+
+@example
+awk -f wordfreq.awk file1 | sort +1 -nr
+@end example
+
+@noindent
+produces a table of the words appearing in @file{file1} in order of
+decreasing frequency.
+
+The @code{awk} program suitably massages the data and produces a word
+frequency table, which is not ordered.
+
+The @code{awk} script's output is then sorted by the @code{sort} utility and
+printed on the terminal. The options given to @code{sort} in this example
+specify to sort using the second field of each input line (skipping one field),
+that the sort keys should be treated as numeric quantities (otherwise
+@samp{15} would come before @samp{5}), and that the sorting should be done
+in descending (reverse) order.
+
+We could have even done the @code{sort} from within the program, by
+changing the @code{END} action to:
+
+@example
+@c file eg/prog/wordfreq.awk
+END @{
+ sort = "sort +1 -nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+@}
+@c endfile
+@end example
+
+You would have to use this way of sorting on systems that do not
+have true pipes.
+
+See the general operating system documentation for more information on how
+to use the @code{sort} program.
+
+@node History Sorting, Extract Program, Word Sorting, Miscellaneous Programs
+@subsection Removing Duplicates from Unsorted Text
+
+The @code{uniq} program
+(@pxref{Uniq Program, ,Printing Non-duplicated Lines of Text}),
+removes duplicate lines from @emph{sorted} data.
+
+Suppose, however, you need to remove duplicate lines from a data file, but
+that you wish to preserve the order the lines are in? A good example of
+this might be a shell history file. The history file keeps a copy of all
+the commands you have entered, and it is not unusual to repeat a command
+several times in a row. Occasionally you might wish to compact the history
+by removing duplicate entries. Yet it is desirable to maintain the order
+of the original commands.
+
+This simple program does the job. It uses two arrays. The @code{data}
+array is indexed by the text of each line.
+For each line, @code{data[$0]} is incremented.
+
+If a particular line has not
+been seen before, then @code{data[$0]} will be zero.
+In that case, the text of the line is stored in @code{lines[count]}.
+Each element of @code{lines} is a unique command, and the indices of
+@code{lines} indicate the order in which those lines were encountered.
+The @code{END} rule simply prints out the lines, in order.
+
+@cindex Rakitzis, Byron
+@findex histsort.awk
+@example
+@group
+@c file eg/prog/histsort.awk
+# histsort.awk --- compact a shell history file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Thanks to Byron Rakitzis for the general idea
+@{
+ if (data[$0]++ == 0)
+ lines[++count] = $0
+@}
+
+END @{
+ for (i = 1; i <= count; i++)
+ print lines[i]
+@}
+@c endfile
+@end group
+@end example
+
+This program also provides a foundation for generating other useful
+information. For example, using the following @code{print} satement in the
+@code{END} rule would indicate how often a particular command was used.
+
+@example
+print data[lines[i]], lines[i]
+@end example
+
+This works because @code{data[$0]} was incremented each time a line was
+seen.
+
+@node Extract Program, Simple Sed, History Sorting, Miscellaneous Programs
+@subsection Extracting Programs from Texinfo Source Files
+
+@iftex
+Both this chapter and the previous chapter
+(@ref{Library Functions, ,A Library of @code{awk} Functions}),
+present a large number of @code{awk} programs.
+@end iftex
+@ifinfo
+The nodes
+@ref{Library Functions, ,A Library of @code{awk} Functions},
+and @ref{Sample Programs, ,Practical @code{awk} Programs},
+are the top level nodes for a large number of @code{awk} programs.
+@end ifinfo
+If you wish to experiment with these programs, it is tedious to have to type
+them in by hand. Here we present a program that can extract parts of a
+Texinfo input file into separate files.
+
+This @value{DOCUMENT} is written in Texinfo, the GNU project's document
+formatting language. A single Texinfo source file can be used to produce both
+printed and on-line documentation.
+@iftex
+Texinfo is fully documented in @cite{Texinfo---The GNU Documentation Format},
+available from the Free Software Foundation.
+@end iftex
+@ifinfo
+The Texinfo language is described fully, starting with
+@ref{Top, , Introduction, texi, Texinfo---The GNU Documentation Format}.
+@end ifinfo
+
+For our purposes, it is enough to know three things about Texinfo input
+files.
+
+@itemize @bullet
+@item
+The ``at'' symbol, @samp{@@}, is special in Texinfo, much like @samp{\} in C
+or @code{awk}. Literal @samp{@@} symbols are represented in Texinfo source
+files as @samp{@@@@}.
+
+@item
+Comments start with either @samp{@@c} or @samp{@@comment}.
+The file extraction program will work by using special comments that start
+at the beginning of a line.
+
+@item
+Example text that should not be split across a page boundary is bracketed
+between lines containing @samp{@@group} and @samp{@@end group} commands.
+@end itemize
+
+The following program, @file{extract.awk}, reads through a Texinfo source
+file, and does two things, based on the special comments.
+Upon seeing @samp{@w{@@c system @dots{}}},
+it runs a command, by extracting the command text from the
+control line and passing it on to the @code{system} function
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+Upon seeing @samp{@@c file @var{filename}}, each subsequent line is sent to
+the file @var{filename}, until @samp{@@c endfile} is encountered.
+The rules in @file{extract.awk} will match either @samp{@@c} or
+@samp{@@comment} by letting the @samp{omment} part be optional.
+Lines containing @samp{@@group} and @samp{@@end group} are simply removed.
+@file{extract.awk} uses the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The example programs in the on-line Texinfo source for @cite{@value{TITLE}}
+(@file{gawk.texi}) have all been bracketed inside @samp{file},
+and @samp{endfile} lines. The @code{gawk} distribution uses a copy of
+@file{extract.awk} to extract the sample
+programs and install many of them in a standard directory, where
+@code{gawk} can find them.
+
+@file{extract.awk} begins by setting @code{IGNORECASE} to one, so that
+mixed upper-case and lower-case letters in the directives won't matter.
+
+The first rule handles calling @code{system}, checking that a command was
+given (@code{NF} is at least three), and also checking that the command
+exited with a zero exit status, signifying OK.
+
+@findex extract.awk
+@example
+@c @group
+@c file eg/prog/extract.awk
+# extract.awk --- extract files and run programs
+# from texinfo files
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN @{ IGNORECASE = 1 @}
+
+@group
+/^@@c(omment)?[ \t]+system/ \
+@{
+ if (NF < 3) @{
+ e = (FILENAME ":" FNR)
+ e = (e ": badly formed `system' line")
+ print e > "/dev/stderr"
+ next
+ @}
+ $1 = ""
+ $2 = ""
+ stat = system($0)
+ if (stat != 0) @{
+ e = (FILENAME ":" FNR)
+ e = (e ": warning: system returned " stat)
+ print e > "/dev/stderr"
+ @}
+@}
+@end group
+@c endfile
+@end example
+
+@noindent
+The variable @code{e} is used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+The second rule handles moving data into files. It verifies that a file
+name was given in the directive. If the file named is not the current file,
+then the current file is closed. This means that an @samp{@@c endfile} was
+not given for that file. (We should probably print a diagnostic in this
+case, although at the moment we do not.)
+
+The @samp{for} loop does the work. It reads lines using @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+For an unexpected end of file, it calls the @code{@w{unexpected_eof}}
+function. If the line is an ``endfile'' line, then it breaks out of
+the loop.
+If the line is an @samp{@@group} or @samp{@@end group} line, then it
+ignores it, and goes on to the next line.
+
+Most of the work is in the following few lines. If the line has no @samp{@@}
+symbols, it can be printed directly. Otherwise, each leading @samp{@@} must be
+stripped off.
+
+To remove the @samp{@@} symbols, the line is split into separate elements of
+the array @code{a}, using the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Each element of @code{a} that is empty indicates two successive @samp{@@}
+symbols in the original line. For each two empty elements (@samp{@@@@} in
+the original file), we have to add back in a single @samp{@@} symbol.
+
+When the processing of the array is finished, @code{join} is called with the
+value of @code{SUBSEP}, to rejoin the pieces back into a single
+line. That line is then printed to the output file.
+
+@example
+@c @group
+@c file eg/prog/extract.awk
+@group
+/^@@c(omment)?[ \t]+file/ \
+@{
+ if (NF != 3) @{
+ e = (FILENAME ":" FNR ": badly formed `file' line")
+ print e > "/dev/stderr"
+ next
+ @}
+@end group
+ if ($3 != curfile) @{
+ if (curfile != "")
+ close(curfile)
+ curfile = $3
+ @}
+
+ for (;;) @{
+ if ((getline line) <= 0)
+ unexpected_eof()
+ if (line ~ /^@@c(omment)?[ \t]+endfile/)
+ break
+ else if (line ~ /^@@(end[ \t]+)?group/)
+ continue
+ if (index(line, "@@") == 0) @{
+ print line > curfile
+ continue
+ @}
+ n = split(line, a, "@@")
+@group
+ # if a[1] == "", means leading @@,
+ # don't add one back in.
+@end group
+ for (i = 2; i <= n; i++) @{
+ if (a[i] == "") @{ # was an @@@@
+ a[i] = "@@"
+ if (a[i+1] == "")
+ i++
+ @}
+ @}
+ print join(a, 1, n, SUBSEP) > curfile
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+An important thing to note is the use of the @samp{>} redirection.
+Output done with @samp{>} only opens the file once; it stays open and
+subsequent output is appended to the file
+(@pxref{Redirection, , Redirecting Output of @code{print} and @code{printf}}).
+This allows us to easily mix program text and explanatory prose for the same
+sample source file (as has been done here!) without any hassle. The file is
+only closed when a new data file name is encountered, or at the end of the
+input file.
+
+Finally, the function @code{@w{unexpected_eof}} prints an appropriate
+error message and then exits.
+
+The @code{END} rule handles the final cleanup, closing the open file.
+
+@example
+@c file eg/prog/extract.awk
+@group
+function unexpected_eof()
+@{
+ printf("%s:%d: unexpected EOF or error\n", \
+ FILENAME, FNR) > "/dev/stderr"
+ exit 1
+@}
+@end group
+
+END @{
+ if (curfile)
+ close(curfile)
+@}
+@c endfile
+@end example
+
+@node Simple Sed, Igawk Program, Extract Program, Miscellaneous Programs
+@subsection A Simple Stream Editor
+
+@cindex @code{sed} utility
+The @code{sed} utility is a ``stream editor,'' a program that reads a
+stream of data, makes changes to it, and passes the modified data on.
+It is often used to make global changes to a large file, or to a stream
+of data generated by a pipeline of commands.
+
+While @code{sed} is a complicated program in its own right, its most common
+use is to perform global substitutions in the middle of a pipeline:
+
+@example
+command1 < orig.data | sed 's/old/new/g' | command2 > result
+@end example
+
+Here, the @samp{s/old/new/g} tells @code{sed} to look for the regexp
+@samp{old} on each input line, and replace it with the text @samp{new},
+globally (i.e.@: all the occurrences on a line). This is similar to
+@code{awk}'s @code{gsub} function
+(@pxref{String Functions, , Built-in Functions for String Manipulation}).
+
+The following program, @file{awksed.awk}, accepts at least two command line
+arguments; the pattern to look for and the text to replace it with. Any
+additional arguments are treated as data file names to process. If none
+are provided, the standard input is used.
+
+@cindex Brennan, Michael
+@cindex @code{awksed}
+@cindex simple stream editor
+@cindex stream editor, simple
+@example
+@c @group
+@c file eg/prog/awksed.awk
+# awksed.awk --- do s/foo/bar/g using just print
+# Thanks to Michael Brennan for the idea
+
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# August 1995
+
+@group
+function usage()
+@{
+ print "usage: awksed pat repl [files...]" > "/dev/stderr"
+ exit 1
+@}
+@end group
+
+BEGIN @{
+ # validate arguments
+ if (ARGC < 3)
+ usage()
+
+ RS = ARGV[1]
+ ORS = ARGV[2]
+
+ # don't use arguments as files
+ ARGV[1] = ARGV[2] = ""
+@}
+
+# look ma, no hands!
+@{
+ if (RT == "")
+ printf "%s", $0
+ else
+ print
+@}
+@c endfile
+@c @end group
+@end example
+
+The program relies on @code{gawk}'s ability to have @code{RS} be a regexp
+and on the setting of @code{RT} to the actual text that terminated the
+record (@pxref{Records, ,How Input is Split into Records}).
+
+The idea is to have @code{RS} be the pattern to look for. @code{gawk}
+will automatically set @code{$0} to the text between matches of the pattern.
+This is text that we wish to keep, unmodified. Then, by setting @code{ORS}
+to the replacement text, a simple @code{print} statement will output the
+text we wish to keep, followed by the replacement text.
+
+There is one wrinkle to this scheme, which is what to do if the last record
+doesn't end with text that matches @code{RS}? Using a @code{print}
+statement unconditionally prints the replacement text, which is not correct.
+
+However, if the file did not end in text that matches @code{RS}, @code{RT}
+will be set to the null string. In this case, we can print @code{$0} using
+@code{printf}
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+
+The @code{BEGIN} rule handles the setup, checking for the right number
+of arguments, and calling @code{usage} if there is a problem. Then it sets
+@code{RS} and @code{ORS} from the command line arguments, and sets
+@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they will
+not be treated as file names
+(@pxref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}).
+
+The @code{usage} function prints an error message and exits.
+
+Finally, the single rule handles the printing scheme outlined above,
+using @code{print} or @code{printf} as appropriate, depending upon the
+value of @code{RT}.
+
+@ignore
+Exercise, compare the performance of this version with the more
+straightforward:
+
+BEGIN {
+ pat = ARGV[1]
+ repl = ARGV[2]
+ ARGV[1] = ARGV[2] = ""
+}
+
+{ gsub(pat, repl); print }
+
+Exercise: what are the advantages and disadvantages of this version vs. sed?
+ Advantage: egrep regexps
+ speed (?)
+ Disadvantage: no & in replacement text
+
+Others?
+@end ignore
+
+@node Igawk Program, , Simple Sed, Miscellaneous Programs
+@subsection An Easy Way to Use Library Functions
+
+Using library functions in @code{awk} can be very beneficial. It
+encourages code re-use and the writing of general functions. Programs are
+smaller, and therefore clearer.
+However, using library functions is only easy when writing @code{awk}
+programs; it is painful when running them, requiring multiple @samp{-f}
+options. If @code{gawk} is unavailable, then so too is the @code{AWKPATH}
+environment variable and the ability to put @code{awk} functions into a
+library directory (@pxref{Options, ,Command Line Options}).
+
+It would be nice to be able to write programs like so:
+
+@example
+# library functions
+@@include getopt.awk
+@@include join.awk
+@dots{}
+
+# main program
+BEGIN @{
+ while ((c = getopt(ARGC, ARGV, "a:b:cde")) != -1)
+ @dots{}
+ @dots{}
+@}
+@end example
+
+The following program, @file{igawk.sh}, provides this service.
+It simulates @code{gawk}'s searching of the @code{AWKPATH} variable,
+and also allows @dfn{nested} includes; i.e.@: a file that has been included
+with @samp{@@include} can contain further @samp{@@include} statements.
+@code{igawk} will make an effort to only include files once, so that nested
+includes don't accidentally include a library function twice.
+
+@code{igawk} should behave externally just like @code{gawk}. This means it
+should accept all of @code{gawk}'s command line arguments, including the
+ability to have multiple source files specified via @samp{-f}, and the
+ability to mix command line and library source files.
+
+The program is written using the POSIX Shell (@code{sh}) command language.
+The way the program works is as follows:
+
+@enumerate
+@item
+Loop through the arguments, saving anything that doesn't represent
+@code{awk} source code for later, when the expanded program is run.
+
+@item
+For any arguments that do represent @code{awk} text, put the arguments into
+a temporary file that will be expanded. There are two cases.
+
+@enumerate a
+@item
+Literal text, provided with @samp{--source} or @samp{--source=}. This
+text is just echoed directly. The @code{echo} program will automatically
+supply a trailing newline.
+
+@item
+File names provided with @samp{-f}. We use a neat trick, and echo
+@samp{@@include @var{filename}} into the temporary file. Since the file
+inclusion program will work the way @code{gawk} does, this will get the text
+of the file included into the program at the correct point.
+@end enumerate
+
+@item
+Run an @code{awk} program (naturally) over the temporary file to expand
+@samp{@@include} statements. The expanded program is placed in a second
+temporary file.
+
+@item
+Run the expanded program with @code{gawk} and any other original command line
+arguments that the user supplied (such as the data file names).
+@end enumerate
+
+The initial part of the program turns on shell tracing if the first
+argument was @samp{debug}. Otherwise, a shell @code{trap} statement
+arranges to clean up any temporary files on program exit or upon an
+interrupt.
+
+@c 2e: For the temp file handling, go with Darrel's ig=${TMP:-/tmp}/igs.$$
+@c 2e: or something as similar as possible.
+
+The next part loops through all the command line arguments.
+There are several cases of interest.
+
+@table @code
+@item --
+This ends the arguments to @code{igawk}. Anything else should be passed on
+to the user's @code{awk} program without being evaluated.
+
+@item -W
+This indicates that the next option is specific to @code{gawk}. To make
+argument processing easier, the @samp{-W} is appended to the front of the
+remaining arguments and the loop continues. (This is an @code{sh}
+programming trick. Don't worry about it if you are not familiar with
+@code{sh}.)
+
+@item -v
+@itemx -F
+These are saved and passed on to @code{gawk}.
+
+@item -f
+@itemx --file
+@itemx --file=
+@itemx -Wfile=
+The file name is saved to the temporary file @file{/tmp/ig.s.$$} with an
+@samp{@@include} statement.
+The @code{sed} utility is used to remove the leading option part of the
+argument (e.g., @samp{--file=}).
+
+@item --source
+@itemx --source=
+@itemx -Wsource=
+The source text is echoed into @file{/tmp/ig.s.$$}.
+
+@item --version
+@itemx --version
+@itemx -Wversion
+@code{igawk} prints its version number, and runs @samp{gawk --version}
+to get the @code{gawk} version information, and then exits.
+@end table
+
+If none of @samp{-f}, @samp{--file}, @samp{-Wfile}, @samp{--source},
+or @samp{-Wsource}, were supplied, then the first non-option argument
+should be the @code{awk} program. If there are no command line
+arguments left, @code{igawk} prints an error message and exits.
+Otherwise, the first argument is echoed into @file{/tmp/ig.s.$$}.
+
+In any case, after the arguments have been processed,
+@file{/tmp/ig.s.$$} contains the complete text of the original @code{awk}
+program.
+
+The @samp{$$} in @code{sh} represents the current process ID number.
+It is often used in shell programs to generate unique temporary file
+names. This allows multiple users to run @code{igawk} without worrying
+that the temporary file names will clash.
+
+@cindex @code{sed} utility
+Here's the program:
+
+@findex igawk.sh
+@example
+@c @group
+@c file eg/prog/igawk.sh
+#! /bin/sh
+
+# igawk --- like gawk but do @@include processing
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# July 1993
+
+if [ "$1" = debug ]
+then
+ set -x
+ shift
+else
+ # cleanup on exit, hangup, interrupt, quit, termination
+ trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15
+fi
+
+while [ $# -ne 0 ] # loop over arguments
+do
+ case $1 in
+ --) shift; break;;
+
+ -W) shift
+ set -- -W"$@@"
+ continue;;
+
+ -[vF]) opts="$opts $1 '$2'"
+ shift;;
+
+ -[vF]*) opts="$opts '$1'" ;;
+
+ -f) echo @@include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+@group
+ -f*) f=`echo "$1" | sed 's/-f//'`
+ echo @@include "$f" >> /tmp/ig.s.$$ ;;
+@end group
+
+ -?file=*) # -Wfile or --file
+ f=`echo "$1" | sed 's/-.file=//'`
+ echo @@include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file) # get arg, $2
+ echo @@include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?source=*) # -Wsource or --source
+ t=`echo "$1" | sed 's/-.source=//'`
+ echo "$t" >> /tmp/ig.s.$$ ;;
+
+ -?source) # get arg, $2
+ echo "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?version)
+ echo igawk: version 1.0 1>&2
+ gawk --version
+ exit 0 ;;
+
+ -[W-]*) opts="$opts '$1'" ;;
+
+ *) break;;
+ esac
+ shift
+done
+
+if [ ! -s /tmp/ig.s.$$ ]
+then
+ if [ -z "$1" ]
+ then
+ echo igawk: no program! 1>&2
+ exit 1
+ else
+ echo "$1" > /tmp/ig.s.$$
+ shift
+ fi
+fi
+
+# at this point, /tmp/ig.s.$$ has the program
+@c endfile
+@c @end group
+@end example
+
+The @code{awk} program to process @samp{@@include} directives reads through
+the program, one line at a time using @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+The input file names and @samp{@@include} statements are managed using a
+stack. As each @samp{@@include} is encountered, the current file name is
+``pushed'' onto the stack, and the file named in the @samp{@@include}
+directive becomes
+the current file name. As each file is finished, the stack is ``popped,''
+and the previous input file becomes the current input file again.
+The process is started by making the original file the first one on the
+stack.
+
+The @code{pathto} function does the work of finding the full path to a
+file. It simulates @code{gawk}'s behavior when searching the @code{AWKPATH}
+environment variable
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+If a file name has a @samp{/} in it, no path search
+is done. Otherwise, the file name is concatenated with the name of each
+directory in the path, and an attempt is made to open the generated file
+name. The only way in @code{awk} to test if a file can be read is to go
+ahead and try to read it with @code{getline}; that is what @code{pathto}
+does.@footnote{On some very old versions of @code{awk}, the test
+@samp{getline junk < t} can loop forever if the file exists but is empty.
+Caveat Emptor.}
+If the file can be read, it is closed, and the file name is
+returned.
+@ignore
+An alternative way to test for the file's existence would be to call
+@samp{system("test -r " t)}, which uses the @code{test} utility to
+see if the file exists and is readable. The disadvantage to this method
+is that it requires creating an extra process, and can thus be slightly
+slower.
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+gawk -- '
+# process @@include directives
+
+function pathto(file, i, t, junk)
+@{
+ if (index(file, "/") != 0)
+ return file
+
+ for (i = 1; i <= ndirs; i++) @{
+ t = (pathlist[i] "/" file)
+ if ((getline junk < t) > 0) @{
+ # found it
+ close(t)
+ return t
+ @}
+ @}
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The main program is contained inside one @code{BEGIN} rule. The first thing it
+does is set up the @code{pathlist} array that @code{pathto} uses. After
+splitting the path on @samp{:}, null elements are replaced with @code{"."},
+which represents the current directory.
+
+@example
+@group
+@c file eg/prog/igawk.sh
+BEGIN @{
+ path = ENVIRON["AWKPATH"]
+ ndirs = split(path, pathlist, ":")
+ for (i = 1; i <= ndirs; i++) @{
+ if (pathlist[i] == "")
+ pathlist[i] = "."
+ @}
+@c endfile
+@end group
+@end example
+
+The stack is initialized with @code{ARGV[1]}, which will be @file{/tmp/ig.s.$$}.
+The main loop comes next. Input lines are read in succession. Lines that
+do not start with @samp{@@include} are printed verbatim.
+
+If the line does start with @samp{@@include}, the file name is in @code{$2}.
+@code{pathto} is called to generate the full path. If it could not, then we
+print an error message and continue.
+
+The next thing to check is if the file has been included already. The
+@code{processed} array is indexed by the full file name of each included
+file, and it tracks this information for us. If the file has been
+seen, a warning message is printed. Otherwise, the new file name is
+pushed onto the stack and processing continues.
+
+Finally, when @code{getline} encounters the end of the input file, the file
+is closed and the stack is popped. When @code{stackptr} is less than zero,
+the program is done.
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+ stackptr = 0
+ input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+ for (; stackptr >= 0; stackptr--) @{
+ while ((getline < input[stackptr]) > 0) @{
+ if (tolower($1) != "@@include") @{
+ print
+ continue
+ @}
+ fpath = pathto($2)
+ if (fpath == "") @{
+ printf("igawk:%s:%d: cannot find %s\n", \
+ input[stackptr], FNR, $2) > "/dev/stderr"
+ continue
+ @}
+@group
+ if (! (fpath in processed)) @{
+ processed[fpath] = input[stackptr]
+ input[++stackptr] = fpath
+ @} else
+ print $2, "included in", input[stackptr], \
+ "already included in", \
+ processed[fpath] > "/dev/stderr"
+ @}
+@end group
+@group
+ close(input[stackptr])
+ @}
+@}' /tmp/ig.s.$$ > /tmp/ig.e.$$
+@end group
+@c endfile
+@c @end group
+@end example
+
+The last step is to call @code{gawk} with the expanded program and the original
+options and command line arguments that the user supplied. @code{gawk}'s
+exit status is passed back on to @code{igawk}'s calling program.
+
+@c this causes more problems than it solves, so leave it out.
+@ignore
+The special file @file{/dev/null} is passed as a data file to @code{gawk}
+to handle an interesting case. Suppose that the user's program only has
+a @code{BEGIN} rule, and there are no data files to read. The program should exit without reading any data
+files. However, suppose that an included library file defines an @code{END}
+rule of its own. In this case, @code{gawk} will hang, reading standard
+input. In order to avoid this, @file{/dev/null} is explicitly to the
+command line. Reading from @file{/dev/null} always returns an immediate
+end of file indication.
+
+@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+eval gawk -f /tmp/ig.e.$$ $opts -- "$@@"
+
+exit $?
+@c endfile
+@c @end group
+@end example
+
+This version of @code{igawk} represents my third attempt at this program.
+There are three key simplifications that made the program work better.
+
+@enumerate
+@item
+Using @samp{@@include} even for the files named with @samp{-f} makes building
+the initial collected @code{awk} program much simpler; all the
+@samp{@@include} processing can be done once.
+
+@item
+The @code{pathto} function doesn't try to save the line read with
+@code{getline} when testing for the file's accessibility. Trying to save
+this line for use with the main program complicates things considerably.
+@c what problem does this engender though - exercise
+@c answer, reading from "-" or /dev/stdin
+
+@item
+Using a @code{getline} loop in the @code{BEGIN} rule does it all in one
+place. It is not necessary to call out to a separate loop for processing
+nested @samp{@@include} statements.
+@end enumerate
+
+Also, this program illustrates that it is often worthwhile to combine
+@code{sh} and @code{awk} programming together. You can usually accomplish
+quite a lot, without having to resort to low-level programming in C or C++, and it
+is frequently easier to do certain kinds of string and argument manipulation
+using the shell than it is in @code{awk}.
+
+Finally, @code{igawk} shows that it is not always necessary to add new
+features to a program; they can often be layered on top. With @code{igawk},
+there is no real reason to build @samp{@@include} processing into
+@code{gawk} itself.
+
+As an additional example of this, consider the idea of having two
+files in a directory in the search path.
+
+@table @file
+@item default.awk
+This file would contain a set of default library functions, such
+as @code{getopt} and @code{assert}.
+
+@item site.awk
+This file would contain library functions that are specific to a site or
+installation, i.e.@: locally developed functions.
+Having a separate file allows @file{default.awk} to change with
+new @code{gawk} releases, without requiring the system administrator to
+update it each time by adding the local functions.
+@end table
+
+One user
+@c Karl Berry, karl@ileaf.com, 10/95
+suggested that @code{gawk} be modified to automatically read these files
+upon startup. Instead, it would be very simple to modify @code{igawk}
+to do this. Since @code{igawk} can process nested @samp{@@include}
+directives, @file{default.awk} could simply contain @samp{@@include}
+statements for the desired library functions.
+
+@c Exercise: make this change
+
+@node Language History, Gawk Summary, Sample Programs, Top
+@chapter The Evolution of the @code{awk} Language
+
+This @value{DOCUMENT} describes the GNU implementation of @code{awk}, which follows
+the POSIX specification. Many @code{awk} users are only familiar
+with the original @code{awk} implementation in Version 7 Unix.
+(This implementation was the basis for @code{awk} in Berkeley Unix,
+through 4.3--Reno. The 4.4 release of Berkeley Unix uses @code{gawk} 2.15.2
+for its version of @code{awk}.) This chapter briefly describes the
+evolution of the @code{awk} language, with cross references to other parts
+of the @value{DOCUMENT} where you can find more information.
+
+@menu
+* V7/SVR3.1:: The major changes between V7 and System V
+ Release 3.1.
+* SVR4:: Minor changes between System V Releases 3.1
+ and 4.
+* POSIX:: New features from the POSIX standard.
+* BTL:: New features from the Bell Laboratories
+ version of @code{awk}.
+* POSIX/GNU:: The extensions in @code{gawk} not in POSIX
+ @code{awk}.
+@end menu
+
+@node V7/SVR3.1, SVR4, Language History, Language History
+@section Major Changes between V7 and SVR3.1
+
+The @code{awk} language evolved considerably between the release of
+Version 7 Unix (1978) and the new version first made generally available in
+System V Release 3.1 (1987). This section summarizes the changes, with
+cross-references to further details.
+
+@itemize @bullet
+@item
+The requirement for @samp{;} to separate rules on a line
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@item
+User-defined functions, and the @code{return} statement
+(@pxref{User-defined, ,User-defined Functions}).
+
+@item
+The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
+
+@item
+The @code{do}-@code{while} statement
+(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).
+
+@item
+The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
+@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The built-in functions @code{gsub}, @code{sub}, and @code{match}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+The built-in functions @code{close}, and @code{system}
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@item
+The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
+and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
+
+@item
+The conditional expression using the ternary operator @samp{?:}
+(@pxref{Conditional Exp, ,Conditional Expressions}).
+
+@item
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+C-compatible operator precedence, which breaks some old @code{awk}
+programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
+
+@item
+Regexps as the value of @code{FS}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}), and as the
+third argument to the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@item
+The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
+(@pxref{Escape Sequences}).
+(Some vendors have updated their old versions of @code{awk} to
+recognize @samp{\r}, @samp{\b}, and @samp{\f}, but this is not
+something you can rely on.)
+
+@item
+Redirection of input for the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+
+@item
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+
+@item
+Multi-dimensional arrays
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).
+@end itemize
+
+@node SVR4, POSIX, V7/SVR3.1, Language History
+@section Changes between SVR3.1 and SVR4
+
+@cindex @code{awk} language, V.4 version
+The System V Release 4 version of Unix @code{awk} added these features
+(some of which originated in @code{gawk}):
+
+@itemize @bullet
+@item
+The @code{ENVIRON} variable (@pxref{Built-in Variables}).
+
+@item
+Multiple @samp{-f} options on the command line
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-v} option for assigning variables before program execution begins
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{--} option for terminating command line options.
+
+@item
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Escape Sequences}).
+
+@item
+A defined return value for the @code{srand} built-in function
+(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The @code{toupper} and @code{tolower} built-in string functions
+for case translation
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+A cleaner specification for the @samp{%c} format-control letter in the
+@code{printf} function
+(@pxref{Control Letters, ,Format-Control Letters}).
+
+@item
+The ability to dynamically pass the field width and precision (@code{"%*.*d"})
+in the argument list of the @code{printf} function
+(@pxref{Control Letters, ,Format-Control Letters}).
+
+@item
+The use of regexp constants such as @code{/foo/} as expressions, where
+they are equivalent to using the matching operator, as in @samp{$0 ~ /foo/}
+(@pxref{Using Constant Regexps, ,Using Regular Expression Constants}).
+@end itemize
+
+@node POSIX, BTL, SVR4, Language History
+@section Changes between SVR4 and POSIX @code{awk}
+
+The POSIX Command Language and Utilities standard for @code{awk}
+introduced the following changes into the language:
+
+@itemize @bullet
+@item
+The use of @samp{-W} for implementation-specific options.
+
+@item
+The use of @code{CONVFMT} for controlling the conversion of numbers
+to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@item
+The concept of a numeric string, and tighter comparison rules to go
+with it (@pxref{Typing and Comparison, ,Variable Typing and Comparison Expressions}).
+
+@item
+More complete documentation of many of the previously undocumented
+features of the language.
+@end itemize
+
+The following common extensions are not permitted by the POSIX
+standard:
+
+@c IMPORTANT! Keep this list in sync with the one in node Options
+
+@itemize @bullet
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Escape Sequences}).
+
+@item
+Newlines do not act as whitespace to separate fields when @code{FS} is
+equal to a single space.
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Function Definition Syntax}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item
+The @code{fflush} built-in function is not supported
+(@pxref{I/O Functions, , Built-in Functions for Input/Output}).
+@end itemize
+
+@node BTL, POSIX/GNU, POSIX, Language History
+@section Extensions in the Bell Laboratories @code{awk}
+
+@cindex Kernighan, Brian
+Brian Kernighan, one of the original designers of Unix @code{awk},
+has made his version available via anonymous @code{ftp}
+(@pxref{Other Versions, ,Other Freely Available @code{awk} Implementations}).
+This section describes extensions in his version of @code{awk} that are
+not in POSIX @code{awk}.
+
+@itemize @bullet
+@item
+The @samp{-mf @var{NNN}} and @samp{-mr @var{NNN}} command line options
+to set the maximum number of fields, and the maximum
+record size, respectively
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @code{fflush} built-in function for flushing buffered output
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@ignore
+@item
+The @code{SYMTAB} array, that allows access to the internal symbol
+table of @code{awk}. This feature is not documented, largely because
+it is somewhat shakily implemented. For instance, you cannot access arrays
+or array elements through it.
+@end ignore
+@end itemize
+
+@node POSIX/GNU, , BTL, Language History
+@section Extensions in @code{gawk} Not in POSIX @code{awk}
+
+@cindex compatibility mode
+The GNU implementation, @code{gawk}, adds a number of features.
+This sections lists them in the order they were added to @code{gawk}.
+They can all be disabled with either the @samp{--traditional} or
+@samp{--posix} options
+(@pxref{Options, ,Command Line Options}).
+
+Version 2.10 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{AWKPATH} environment variable for specifying a path search for
+the @samp{-f} command line option
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).
+
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
+@file{/dev/fd/@var{n}} file name interpretation
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+@end itemize
+
+Version 2.13 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size, ,Reading Fixed-width Data}).
+
+@item
+The @code{systime} and @code{strftime} built-in functions for obtaining
+and printing time stamps
+(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).
+
+@item
+The @samp{-W lint} option to provide source code and run time error
+and portability checking
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-W compat} option to turn off these extensions
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-W posix} option for full POSIX compliance
+(@pxref{Options, ,Command Line Options}).
+@end itemize
+
+Version 2.14 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{next file} statement for skipping to the next data file
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+@end itemize
+
+Version 2.15 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{ARGIND} variable, that tracks the movement of @code{FILENAME}
+through @code{ARGV} (@pxref{Built-in Variables}).
+
+@item
+The @code{ERRNO} variable, that contains the system error message when
+@code{getline} returns @minus{}1, or when @code{close} fails
+(@pxref{Built-in Variables}).
+
+@item
+The ability to use GNU-style long named options that start with @samp{--}
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{--source} option for mixing command line and library
+file source code
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and
+@file{/dev/user} file name interpretation
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+@end itemize
+
+Version 3.0 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{next file} statement became @code{nextfile}
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+
+@item
+The @samp{--lint-old} option to
+warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}
+(@pxref{V7/SVR3.1, , Major Changes between V7 and SVR3.1}).
+
+@item
+The @samp{--traditional} option was added as a better name for
+@samp{--compat} (@pxref{Options, ,Command Line Options}).
+
+@item
+The ability for @code{FS} to be a null string, and for the third
+argument to @code{split} to be the null string
+(@pxref{Single Character Fields, , Making Each Character a Separate Field}).
+
+@item
+The ability for @code{RS} to be a regexp
+(@pxref{Records, , How Input is Split into Records}).
+
+@item
+The @code{RT} variable
+(@pxref{Records, , How Input is Split into Records}).
+
+@item
+The @code{gensub} function for more powerful text manipulation
+(@pxref{String Functions, , Built-in Functions for String Manipulation}).
+
+@item
+The @code{strftime} function acquired a default time format,
+allowing it to be called with no arguments
+(@pxref{Time Functions, , Functions for Dealing with Time Stamps}).
+
+@item
+Full support for both POSIX and GNU regexps
+(@pxref{Regexp, , Regular Expressions}).
+
+@item
+The @samp{--re-interval} option to provide interval expressions in regexps
+(@pxref{Regexp Operators, , Regular Expression Operators}).
+
+@item
+@code{IGNORECASE} changed, now applying to string comparison as well
+as regexp operations
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).
+
+@item
+The @samp{-m} option and the @code{fflush} function from the
+Bell Labs research version of @code{awk}
+(@pxref{Options, ,Command Line Options}; also
+@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@item
+The use of GNU Autoconf to control the configuration process
+(@pxref{Quick Installation, , Compiling @code{gawk} for Unix}).
+
+@item
+Amiga support
+(@pxref{Amiga Installation, ,Installing @code{gawk} on an Amiga}).
+
+@c XXX ADD MORE STUFF HERE
+
+@end itemize
+
+@node Gawk Summary, Installation, Language History, Top
+@appendix @code{gawk} Summary
+
+This appendix provides a brief summary of the @code{gawk} command line and the
+@code{awk} language. It is designed to serve as ``quick reference.'' It is
+therefore terse, but complete.
+
+@menu
+* Command Line Summary:: Recapitulation of the command line.
+* Language Summary:: A terse review of the language.
+* Variables/Fields:: Variables, fields, and arrays.
+* Rules Summary:: Patterns and Actions, and their component
+ parts.
+* Actions Summary:: Quick overview of actions.
+* Functions Summary:: Defining and calling functions.
+* Historical Features:: Some undocumented but supported ``features''.
+@end menu
+
+@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
+@appendixsec Command Line Options Summary
+
+The command line consists of options to @code{gawk} itself, the
+@code{awk} program text (if not supplied via the @samp{-f} option), and
+values to be made available in the @code{ARGC} and @code{ARGV}
+predefined @code{awk} variables:
+
+@example
+gawk @r{[@var{POSIX or GNU style options}]} -f @var{source-file} @r{[@code{--}]} @var{file} @dots{}
+gawk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+The options that @code{gawk} accepts are:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+Use @var{fs} for the input field separator (the value of the @code{FS}
+predefined variable).
+
+@item -f @var{program-file}
+@itemx --file @var{program-file}
+Read the @code{awk} program source from the file @var{program-file}, instead
+of from the first command line argument.
+
+@item -mf @var{NNN}
+@itemx -mr @var{NNN}
+The @samp{f} flag sets
+the maximum number of fields, and the @samp{r} flag sets the maximum
+record size. These options are ignored by @code{gawk}, since @code{gawk}
+has no predefined limits; they are only for compatibility with the
+Bell Labs research version of Unix @code{awk}.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+Assign the variable @var{var} the value @var{val} before program execution
+begins.
+
+@item -W traditional
+@itemx -W compat
+@itemx --traditional
+@itemx --compat
+Use compatibility mode, in which @code{gawk} extensions are turned
+off.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+Print the short version of the General Public License on the standard
+output, and exit. This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+Print a relatively short summary of the available options on the standard
+output, and exit.
+
+@item -W lint
+@itemx --lint
+Give warnings about dubious or non-portable @code{awk} constructs.
+
+@item -W lint-old
+@itemx --lint-old
+Warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}.
+
+@item -W posix
+@itemx --posix
+Use POSIX compatibility mode, in which @code{gawk} extensions
+are turned off and additional restrictions apply.
+
+@item -W re-interval
+@itemx --re-interval
+Allow interval expressions
+(@pxref{Regexp Operators, , Regular Expression Operators}),
+in regexps.
+
+@item -W source=@var{program-text}
+@itemx --source @var{program-text}
+Use @var{program-text} as @code{awk} program source code. This option allows
+mixing command line source code with source code from files, and is
+particularly useful for mixing command line programs with library functions.
+
+@item -W version
+@itemx --version
+Print version information for this particular copy of @code{gawk} on the error
+output.
+
+@item --
+Signal the end of options. This is useful to allow further arguments to the
+@code{awk} program itself to start with a @samp{-}. This is mainly for
+consistency with POSIX argument parsing conventions.
+@end table
+
+Any other options are flagged as invalid, but are otherwise ignored.
+@xref{Options, ,Command Line Options}, for more details.
+
+@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
+@appendixsec Language Summary
+
+An @code{awk} program consists of a sequence of zero or more pattern-action
+statements and optional function definitions. One or the other of the
+pattern and action may be omitted.
+
+@example
+@var{pattern} @{ @var{action statements} @}
+@var{pattern}
+ @{ @var{action statements} @}
+
+function @var{name}(@var{parameter list}) @{ @var{action statements} @}
+@end example
+
+@code{gawk} first reads the program source from the
+@var{program-file}(s), if specified, or from the first non-option
+argument on the command line. The @samp{-f} option may be used multiple
+times on the command line. @code{gawk} reads the program text from all
+the @var{program-file} files, effectively concatenating them in the
+order they are specified. This is useful for building libraries of
+@code{awk} functions, without having to include them in each new
+@code{awk} program that uses them. To use a library function in a file
+from a program typed in on the command line, specify
+@samp{--source '@var{program}'}, and type your program in between the single
+quotes.
+@xref{Options, ,Command Line Options}.
+
+The environment variable @code{AWKPATH} specifies a search path to use
+when finding source files named with the @samp{-f} option. The default
+path, which is
+@samp{.:/usr/local/share/awk}@footnote{The path may use a directory
+other than @file{/usr/local/share/awk}, depending upon how @code{gawk}
+was built and installed.} is used if @code{AWKPATH} is not set.
+If a file name given to the @samp{-f} option contains a @samp{/} character,
+no path search is performed.
+@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+
+@code{gawk} compiles the program into an internal form, and then proceeds to
+read each file named in the @code{ARGV} array.
+The initial values of @code{ARGV} come from the command line arguments.
+If there are no files named
+on the command line, @code{gawk} reads the standard input.
+
+If a ``file'' named on the command line has the form
+@samp{@var{var}=@var{val}}, it is treated as a variable assignment: the
+variable @var{var} is assigned the value @var{val}.
+If any of the files have a value that is the null string, that
+element in the list is skipped.
+
+For each record in the input, @code{gawk} tests to see if it matches any
+@var{pattern} in the @code{awk} program. For each pattern that the record
+matches, the associated @var{action} is executed.
+
+@node Variables/Fields, Rules Summary, Language Summary, Gawk Summary
+@appendixsec Variables and Fields
+
+@code{awk} variables are not declared; they come into existence when they are
+first used. Their values are either floating-point numbers or strings.
+@code{awk} also has one-dimensional arrays; multiple-dimensional arrays
+may be simulated. There are several predefined variables that
+@code{awk} sets as a program runs; these are summarized below.
+
+@menu
+* Fields Summary:: Input field splitting.
+* Built-in Summary:: @code{awk}'s built-in variables.
+* Arrays Summary:: Using arrays.
+* Data Type Summary:: Values in @code{awk} are numbers or strings.
+@end menu
+
+@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
+@appendixsubsec Fields
+
+As each input line is read, @code{gawk} splits the line into
+@var{fields}, using the value of the @code{FS} variable as the field
+separator. If @code{FS} is a single character, fields are separated by
+that character. Otherwise, @code{FS} is expected to be a full regular
+expression. In the special case that @code{FS} is a single space,
+fields are separated by runs of spaces, tabs and/or newlines.@footnote{In
+POSIX @code{awk}, newline does not separate fields.}
+If @code{FS} is the null string (@code{""}), then each individual
+character in the record becomes a separate field.
+Note that the value
+of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
+also affects how fields are split when @code{FS} is a regular expression.
+
+Each field in the input line may be referenced by its position, @code{$1},
+@code{$2}, and so on. @code{$0} is the whole line. The value of a field may
+be assigned to as well. Field numbers need not be constants:
+
+@example
+n = 5
+print $n
+@end example
+
+@noindent
+prints the fifth field in the input line. The variable @code{NF} is set to
+the total number of fields in the input line.
+
+References to non-existent fields (i.e.@: fields after @code{$NF}) return
+the null string. However, assigning to a non-existent field (e.g.,
+@code{$(NF+2) = 5}) increases the value of @code{NF}, creates any
+intervening fields with the null string as their value, and causes the
+value of @code{$0} to be recomputed, with the fields being separated by
+the value of @code{OFS}.
+Decrementing @code{NF} causes the values of fields past the new value to
+be lost, and the value of @code{$0} to be recomputed, with the fields being
+separated by the value of @code{OFS}.
+@xref{Reading Files, ,Reading Input Files}.
+
+@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
+@appendixsubsec Built-in Variables
+
+@code{gawk}'s built-in variables are:
+
+@table @code
+@item ARGC
+The number of elements in @code{ARGV}. See below for what is actually
+included in @code{ARGV}.
+
+@item ARGIND
+The index in @code{ARGV} of the current file being processed.
+When @code{gawk} is processing the input data files,
+it is always true that @samp{FILENAME == ARGV[ARGIND]}.
+
+@item ARGV
+The array of command line arguments. The array is indexed from zero to
+@code{ARGC} @minus{} 1. Dynamically changing @code{ARGC} and
+the contents of @code{ARGV}
+can control the files used for data. A null-valued element in
+@code{ARGV} is ignored. @code{ARGV} does not include the options to
+@code{awk} or the text of the @code{awk} program itself.
+
+@item CONVFMT
+The conversion format to use when converting numbers to strings.
+
+@item FIELDWIDTHS
+A space separated list of numbers describing the fixed-width input data.
+
+@item ENVIRON
+An array of environment variable values. The array
+is indexed by variable name, each element being the value of that
+variable. Thus, the environment variable @code{HOME} is
+@code{ENVIRON["HOME"]}. One possible value might be @file{/home/arnold}.
+
+Changing this array does not affect the environment seen by programs
+which @code{gawk} spawns via redirection or the @code{system} function.
+(This may change in a future version of @code{gawk}.)
+
+Some operating systems do not have environment variables.
+The @code{ENVIRON} array is empty when running on these systems.
+
+@item ERRNO
+The system error message when an error occurs using @code{getline}
+or @code{close}.
+
+@item FILENAME
+The name of the current input file. If no files are specified on the command
+line, the value of @code{FILENAME} is the null string.
+
+@item FNR
+The input record number in the current input file.
+
+@item FS
+The input field separator, a space by default.
+
+@item IGNORECASE
+The case-sensitivity flag for string comparisons and regular expression
+operations. If @code{IGNORECASE} has a non-zero value, then pattern
+matching in rules, record separating with @code{RS}, field splitting
+with @code{FS}, regular expression matching with @samp{~} and
+@samp{!~}, and the @code{gensub}, @code{gsub}, @code{index},
+@code{match}, @code{split} and @code{sub} built-in functions all
+ignore case when doing regular expression operations, and all string
+comparisons are done ignoring case.
+The value of @code{IGNORECASE} does @emph{not} affect array subscripting.
+
+@item NF
+The number of fields in the current input record.
+
+@item NR
+The total number of input records seen so far.
+
+@item OFMT
+The output format for numbers for the @code{print} statement,
+@code{"%.6g"} by default.
+
+@item OFS
+The output field separator, a space by default.
+
+@item ORS
+The output record separator, by default a newline.
+
+@item RS
+The input record separator, by default a newline.
+If @code{RS} is set to the null string, then records are separated by
+blank lines. When @code{RS} is set to the null string, then the newline
+character always acts as a field separator, in addition to whatever value
+@code{FS} may have. If @code{RS} is set to a multi-character
+string, it denotes a regexp; input text matching the regexp
+separates records.
+
+@item RT
+The input text that matched the text denoted by @code{RS},
+the record separator.
+
+@item RSTART
+The index of the first character last matched by @code{match}; zero if no match.
+
+@item RLENGTH
+The length of the string last matched by @code{match}; @minus{}1 if no match.
+
+@item SUBSEP
+The string used to separate multiple subscripts in array elements, by
+default @code{"\034"}.
+@end table
+
+@xref{Built-in Variables}, for more information.
+
+@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
+@appendixsubsec Arrays
+
+Arrays are subscripted with an expression between square brackets
+(@samp{[} and @samp{]}). Array subscripts are @emph{always} strings;
+numbers are converted to strings as necessary, following the standard
+conversion rules
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+If you use multiple expressions separated by commas inside the square
+brackets, then the array subscript is a string consisting of the
+concatenation of the individual subscript values, converted to strings,
+separated by the subscript separator (the value of @code{SUBSEP}).
+
+The special operator @code{in} may be used in a conditional context
+to see if an array has an index consisting of a particular value.
+
+@example
+if (val in array)
+ print array[val]
+@end example
+
+If the array has multiple subscripts, use @samp{(i, j, @dots{}) in @var{array}}
+to test for existence of an element.
+
+The @code{in} construct may also be used in a @code{for} loop to iterate
+over all the elements of an array.
+@xref{Scanning an Array, ,Scanning All Elements of an Array}.
+
+You can remove an element from an array using the @code{delete} statement.
+
+You can clear an entire array using @samp{delete @var{array}}.
+
+@xref{Arrays, ,Arrays in @code{awk}}.
+
+@node Data Type Summary, , Arrays Summary, Variables/Fields
+@appendixsubsec Data Types
+
+The value of an @code{awk} expression is always either a number
+or a string.
+
+Some contexts (such as arithmetic operators) require numeric
+values. They convert strings to numbers by interpreting the text
+of the string as a number. If the string does not look like a
+number, it converts to zero.
+
+Other contexts (such as concatenation) require string values.
+They convert numbers to strings by effectively printing them
+with @code{sprintf}.
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.
+
+To force conversion of a string value to a number, simply add zero
+to it. If the value you start with is already a number, this
+does not change it.
+
+To force conversion of a numeric value to a string, concatenate it with
+the null string.
+
+Comparisons are done numerically if both operands are numeric, or if
+one is numeric and the other is a numeric string. Otherwise one or
+both operands are converted to strings and a string comparison is
+performed. Fields, @code{getline} input, @code{FILENAME}, @code{ARGV}
+elements, @code{ENVIRON} elements and the elements of an array created
+by @code{split} are the only items that can be numeric strings. String
+constants, such as @code{"3.1415927"} are not numeric strings, they are
+string constants. The full rules for comparisons are described in
+@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+Uninitialized variables have the string value @code{""} (the null, or
+empty, string). In contexts where a number is required, this is
+equivalent to zero.
+
+@xref{Variables}, for more information on variable naming and initialization;
+@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
+on how variable values are interpreted.
+
+@node Rules Summary, Actions Summary, Variables/Fields, Gawk Summary
+@appendixsec Patterns
+
+@menu
+* Pattern Summary:: Quick overview of patterns.
+* Regexp Summary:: Quick overview of regular expressions.
+@end menu
+
+An @code{awk} program is mostly composed of rules, each consisting of a
+pattern followed by an action. The action is enclosed in @samp{@{} and
+@samp{@}}. Either the pattern may be missing, or the action may be
+missing, but not both. If the pattern is missing, the
+action is executed for every input record. A missing action is
+equivalent to @samp{@w{@{ print @}}}, which prints the entire line.
+
+@c These paragraphs repeated for both patterns and actions. I don't
+@c like this, but I also don't see any way around it. Update both copies
+@c if they need fixing.
+Comments begin with the @samp{#} character, and continue until the end of the
+line. Blank lines may be used to separate statements. Statements normally
+end with a newline; however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}. Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line. In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.
+
+Multiple statements may be put on one line by separating each one with
+a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.
+
+@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
+@appendixsubsec Pattern Summary
+
+@code{awk} patterns may be one of the following:
+
+@example
+/@var{regular expression}/
+@var{relational expression}
+@var{pattern} && @var{pattern}
+@var{pattern} || @var{pattern}
+@var{pattern} ? @var{pattern} : @var{pattern}
+(@var{pattern})
+! @var{pattern}
+@var{pattern1}, @var{pattern2}
+BEGIN
+END
+@end example
+
+@code{BEGIN} and @code{END} are two special kinds of patterns that are not
+tested against the input. The action parts of all @code{BEGIN} rules are
+concatenated as if all the statements had been written in a single @code{BEGIN}
+rule. They are executed before any of the input is read. Similarly, all the
+@code{END} rules are concatenated, and executed when all the input is exhausted (or
+when an @code{exit} statement is executed). @code{BEGIN} and @code{END}
+patterns cannot be combined with other patterns in pattern expressions.
+@code{BEGIN} and @code{END} rules cannot have missing action parts.
+
+For @code{/@var{regular-expression}/} patterns, the associated statement is
+executed for each input record that matches the regular expression. Regular
+expressions are summarized below.
+
+A @var{relational expression} may use any of the operators defined below in
+the section on actions. These generally test whether certain fields match
+certain regular expressions.
+
+The @samp{&&}, @samp{||}, and @samp{!} operators are logical ``and,''
+logical ``or,'' and logical ``not,'' respectively, as in C. They do
+short-circuit evaluation, also as in C, and are used for combining more
+primitive pattern expressions. As in most languages, parentheses may be
+used to change the order of evaluation.
+
+The @samp{?:} operator is like the same operator in C. If the first
+pattern matches, then the second pattern is matched against the input
+record; otherwise, the third is matched. Only one of the second and
+third patterns is matched.
+
+The @samp{@var{pattern1}, @var{pattern2}} form of a pattern is called a
+range pattern. It matches all input lines starting with a line that
+matches @var{pattern1}, and continuing until a line that matches
+@var{pattern2}, inclusive. A range pattern cannot be used as an operand
+of any of the pattern operators.
+
+@xref{Pattern Overview, ,Pattern Elements}.
+
+@node Regexp Summary, , Pattern Summary, Rules Summary
+@appendixsubsec Regular Expressions
+
+Regular expressions are based on POSIX EREs (extended regular expressions).
+The escape sequences allowed in string constants are also valid in
+regular expressions (@pxref{Escape Sequences}).
+Regexps are composed of characters as follows:
+
+@table @code
+@item @var{c}
+matches the character @var{c} (assuming @var{c} is none of the characters
+listed below).
+
+@item \@var{c}
+matches the literal character @var{c}.
+
+@item .
+matches any character, @emph{including} newline.
+In strict POSIX mode, @samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+
+@item ^
+matches the beginning of a string.
+
+@item $
+matches the end of a string.
+
+@item [@var{abc}@dots{}]
+matches any of the characters @var{abc}@dots{} (character list).
+
+@item [[:@var{class}:]]
+matches any character in the character class @var{class}. Allowable classes
+are @code{alnum}, @code{alpha}, @code{blank}, @code{cntrl},
+@code{digit}, @code{graph}, @code{lower}, @code{print}, @code{punct},
+@code{space}, @code{upper}, and @code{xdigit}.
+
+@item [[.@var{symbol}.]]
+matches the multi-character collating symbol @var{symbol}.
+@code{gawk} does not currently support collating symbols.
+
+@item [[=@var{classname}=]]
+matches any of the equivalent characters in the current locale named by the
+equivalence class @var{classname}.
+@code{gawk} does not currently support equivalence classes.
+
+@item [^@var{abc}@dots{}]
+matches any character except @var{abc}@dots{} (negated
+character list).
+
+@item @var{r1}|@var{r2}
+matches either @var{r1} or @var{r2} (alternation).
+
+@item @var{r1r2}
+matches @var{r1}, and then @var{r2} (concatenation).
+
+@item @var{r}+
+matches one or more @var{r}'s.
+
+@item @var{r}*
+matches zero or more @var{r}'s.
+
+@item @var{r}?
+matches zero or one @var{r}'s.
+
+@item (@var{r})
+matches @var{r} (grouping).
+
+@item @var{r}@{@var{n}@}
+@itemx @var{r}@{@var{n},@}
+@itemx @var{r}@{@var{n},@var{m}@}
+matches at least @var{n}, @var{n} to any number, or @var{n} to @var{m}
+occurrences of @var{r} (interval expressions).
+
+@item \y
+matches the empty string at either the beginning or the
+end of a word.
+
+@item \B
+matches the empty string within a word.
+
+@item \<
+matches the empty string at the beginning of a word.
+
+@item \>
+matches the empty string at the end of a word.
+
+@item \w
+matches any word-constituent character (alphanumeric characters and
+the underscore).
+
+@item \W
+matches any character that is not word-constituent.
+
+@item \`
+matches the empty string at the beginning of a buffer (same as a string
+in @code{gawk}).
+
+@item \'
+matches the empty string at the end of a buffer.
+@end table
+
+The various command line options
+control how @code{gawk} interprets characters in regexps.
+
+@c NOTE!!! Keep this in sync with the same table in the regexp chapter!
+@table @asis
+@item No options
+In the default case, @code{gawk} provide all the facilities of
+POSIX regexps and the GNU regexp operators described above.
+However, interval expressions are not supported.
+
+@item @code{--posix}
+Only POSIX regexps are supported, the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}). Interval expressions
+are allowed.
+
+@item @code{--traditional}
+Traditional Unix @code{awk} regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes (@code{[[:alnum:]]} and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, even if @samp{--traditional}
+has been provided.
+@end table
+
+@xref{Regexp, ,Regular Expressions}.
+
+@node Actions Summary, Functions Summary, Rules Summary, Gawk Summary
+@appendixsec Actions
+
+Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
+A missing action statement is equivalent to @samp{@w{@{ print @}}}.
+
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages. The operators, control statements,
+and Input/Output statements available are similar to those in C.
+
+@c These paragraphs repeated for both patterns and actions. I don't
+@c like this, but I also don't see any way around it. Update both copies
+@c if they need fixing.
+Comments begin with the @samp{#} character, and continue until the end of the
+line. Blank lines may be used to separate statements. Statements normally
+end with a newline; however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}. Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line. In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.
+
+Multiple statements may be put on one line by separating each one with
+a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.
+
+@menu
+* Operator Summary:: @code{awk} operators.
+* Control Flow Summary:: The control statements.
+* I/O Summary:: The I/O statements.
+* Printf Summary:: A summary of @code{printf}.
+* Special File Summary:: Special file names interpreted internally.
+* Built-in Functions Summary:: Built-in numeric and string functions.
+* Time Functions Summary:: Built-in time functions.
+* String Constants Summary:: Escape sequences in strings.
+@end menu
+
+@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
+@appendixsubsec Operators
+
+The operators in @code{awk}, in order of decreasing precedence, are:
+
+@table @code
+@item (@dots{})
+Grouping.
+
+@item $
+Field reference.
+
+@item ++ --
+Increment and decrement, both prefix and postfix.
+
+@item ^
+Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
+operator, but they are not specified in the POSIX standard).
+
+@item + - !
+Unary plus, unary minus, and logical negation.
+
+@item * / %
+Multiplication, division, and modulus.
+
+@item + -
+Addition and subtraction.
+
+@item @var{space}
+String concatenation.
+
+@item < <= > >= != ==
+The usual relational operators.
+
+@item ~ !~
+Regular expression match, negated match.
+
+@item in
+Array membership.
+
+@item &&
+Logical ``and''.
+
+@item ||
+Logical ``or''.
+
+@item ?:
+A conditional expression. This has the form @samp{@var{expr1} ?
+@var{expr2} : @var{expr3}}. If @var{expr1} is true, the value of the
+expression is @var{expr2}; otherwise it is @var{expr3}. Only one of
+@var{expr2} and @var{expr3} is evaluated.
+
+@item = += -= *= /= %= ^=
+Assignment. Both absolute assignment (@code{@var{var}=@var{value}})
+and operator assignment (the other forms) are supported.
+@end table
+
+@xref{Expressions}.
+
+@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
+@appendixsubsec Control Statements
+
+The control statements are as follows:
+
+@example
+if (@var{condition}) @var{statement} @r{[} else @var{statement} @r{]}
+while (@var{condition}) @var{statement}
+do @var{statement} while (@var{condition})
+for (@var{expr1}; @var{expr2}; @var{expr3}) @var{statement}
+for (@var{var} in @var{array}) @var{statement}
+break
+continue
+delete @var{array}[@var{index}]
+delete @var{array}
+exit @r{[} @var{expression} @r{]}
+@{ @var{statements} @}
+@end example
+
+@xref{Statements, ,Control Statements in Actions}.
+
+@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
+@appendixsubsec I/O Statements
+
+The Input/Output statements are as follows:
+
+@table @code
+@item getline
+Set @code{$0} from next input record; set @code{NF}, @code{NR}, @code{FNR}.
+@xref{Getline, ,Explicit Input with @code{getline}}.
+
+@item getline <@var{file}
+Set @code{$0} from next record of @var{file}; set @code{NF}.
+
+@item getline @var{var}
+Set @var{var} from next input record; set @code{NR}, @code{FNR}.
+
+@item getline @var{var} <@var{file}
+Set @var{var} from next record of @var{file}.
+
+@item @var{command} | getline
+Run @var{command}, piping its output into @code{getline}; sets @code{$0},
+@code{NF}, @code{NR}.
+
+@item @var{command} | getline @code{var}
+Run @var{command}, piping its output into @code{getline}; sets @var{var}.
+
+@item next
+Stop processing the current input record. The next input record is read and
+processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+@xref{Next Statement, ,The @code{next} Statement}.
+
+@item nextfile
+Stop processing the current input file. The next input record read comes
+from the next input file. @code{FILENAME} is updated, @code{FNR} is set to one,
+@code{ARGIND} is incremented,
+and processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+Earlier versions of @code{gawk} used @samp{next file}; this usage is still
+supported, but is considered to be deprecated.
+@xref{Nextfile Statement, ,The @code{nextfile} Statement}.
+
+@item print
+Prints the current record.
+@xref{Printing, ,Printing Output}.
+
+@item print @var{expr-list}
+Prints expressions.
+
+@item print @var{expr-list} > @var{file}
+Prints expressions to @var{file}. If @var{file} does not exist, it is
+created. If it does exist, its contents are deleted the first time the
+@code{print} is executed.
+
+@item print @var{expr-list} >> @var{file}
+Prints expressions to @var{file}. The previous contents of @var{file}
+are retained, and the output of @code{print} is appended to the file.
+
+@item print @var{expr-list} | @var{command}
+Prints expressions, sending the output down a pipe to @var{command}.
+The pipeline to the command stays open until the @code{close} function
+is called.
+
+@item printf @var{fmt, expr-list}
+Format and print.
+
+@item printf @var{fmt, expr-list} > file
+Format and print to @var{file}. If @var{file} does not exist, it is
+created. If it does exist, its contents are deleted the first time the
+@code{printf} is executed.
+
+@item printf @var{fmt, expr-list} >> @var{file}
+Format and print to @var{file}. The previous contents of @var{file}
+are retained, and the output of @code{printf} is appended to the file.
+
+@item printf @var{fmt, expr-list} | @var{command}
+Format and print, sending the output down a pipe to @var{command}.
+The pipeline to the command stays open until the @code{close} function
+is called.
+@end table
+
+@code{getline} returns zero on end of file, and @minus{}1 on an error.
+In the event of an error, @code{getline} will set @code{ERRNO} to
+the value of a system-dependent string that describes the error.
+
+@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
+@appendixsubsec @code{printf} Summary
+
+Conversion specification have the form
+@code{%}[@var{flag}][@var{width}][@code{.}@var{prec}]@var{format}.
+@c whew!
+Items in brackets are optional.
+
+The @code{awk} @code{printf} statement and @code{sprintf} function
+accept the following conversion specification formats:
+
+@table @code
+@item %c
+An ASCII character. If the argument used for @samp{%c} is numeric, it is
+treated as a character and printed. Otherwise, the argument is assumed to
+be a string, and the only first character of that string is printed.
+
+@item %d
+@itemx %i
+A decimal number (the integer part).
+
+@item %e
+@itemx %E
+A floating point number of the form
+@samp{@r{[}-@r{]}d.dddddde@r{[}+-@r{]}dd}.
+The @samp{%E} format uses @samp{E} instead of @samp{e}.
+
+@item %f
+A floating point number of the form
+@r{[}@code{-}@r{]}@code{ddd.dddddd}.
+
+@item %g
+@itemx %G
+Use either the @samp{%e} or @samp{%f} formats, whichever produces a shorter
+string, with non-significant zeros suppressed.
+@samp{%G} will use @samp{%E} instead of @samp{%e}.
+
+@item %o
+An unsigned octal number (again, an integer).
+
+@item %s
+A character string.
+
+@item %x
+@itemx %X
+An unsigned hexadecimal number (an integer).
+The @samp{%X} format uses @samp{A} through @samp{F} instead of
+@samp{a} through @samp{f} for decimal 10 through 15.
+
+@item %%
+A single @samp{%} character; no argument is converted.
+@end table
+
+There are optional, additional parameters that may lie between the @samp{%}
+and the control letter:
+
+@table @code
+@item -
+The expression should be left-justified within its field.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternate form'' for certain control letters.
+For @samp{o}, supply a leading zero.
+For @samp{x}, and @samp{X}, supply a leading @samp{0x} or @samp{0X} for
+a non-zero result.
+For @samp{e}, @samp{E}, and @samp{f}, the result will always contain a
+decimal point.
+For @samp{g}, and @samp{G}, trailing zeros are not removed from the result.
+
+@item 0
+A leading @samp{0} (zero) acts as a flag, that indicates output should be
+padded with zeros instead of spaces.
+This applies even to non-numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to be printed.
+
+@item @var{width}
+The field should be padded to this width. The field is normally padded
+with spaces. If the @samp{0} flag has been used, it is padded with zeros.
+
+@item .@var{prec}
+A number that specifies the precision to use when printing.
+For the @samp{e}, @samp{E}, and @samp{f} formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the @samp{g}, and @samp{G} formats, it specifies the maximum number
+of significant digits. For the @samp{d}, @samp{o}, @samp{i}, @samp{u},
+@samp{x}, and @samp{X} formats, it specifies the minimum number of
+digits to print. For the @samp{s} format, it specifies the maximum number of
+characters from the string that should be printed.
+@end table
+
+Either or both of the @var{width} and @var{prec} values may be specified
+as @samp{*}. In that case, the particular value is taken from the argument
+list.
+
+@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}.
+
+@node Special File Summary, Built-in Functions Summary, Printf Summary, Actions Summary
+@appendixsubsec Special File Names
+
+When doing I/O redirection from either @code{print} or @code{printf} into a
+file, or via @code{getline} from a file, @code{gawk} recognizes certain special
+file names internally. These file names allow access to open file descriptors
+inherited from @code{gawk}'s parent process (usually the shell). The
+file names are:
+
+@table @file
+@item /dev/stdin
+The standard input.
+
+@item /dev/stdout
+The standard output.
+
+@item /dev/stderr
+The standard error output.
+
+@item /dev/fd/@var{n}
+The file denoted by the open file descriptor @var{n}.
+@end table
+
+In addition, reading the following files provides process related information
+about the running @code{gawk} program. All returned records are terminated
+with a newline.
+
+@table @file
+@item /dev/pid
+Returns the process ID of the current process.
+
+@item /dev/ppid
+Returns the parent process ID of the current process.
+
+@item /dev/pgrpid
+Returns the process group ID of the current process.
+
+@item /dev/user
+At least four space-separated fields, containing the return values of
+the @code{getuid}, @code{geteuid}, @code{getgid}, and @code{getegid}
+system calls.
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)
+@end table
+
+@noindent
+These file names may also be used on the command line to name data files.
+These file names are only recognized internally if you do not
+actually have files with these names on your system.
+
+@xref{Special Files, ,Special File Names in @code{gawk}}, for a longer description that
+provides the motivation for this feature.
+
+@node Built-in Functions Summary, Time Functions Summary, Special File Summary, Actions Summary
+@appendixsubsec Built-in Functions
+
+@code{awk} provides a number of built-in functions for performing
+numeric operations, string related operations, and I/O related operations.
+
+The built-in arithmetic functions are:
+
+@table @code
+@item atan2(@var{y}, @var{x})
+the arctangent of @var{y/x} in radians.
+
+@item cos(@var{expr})
+the cosine of @var{expr}, which is in radians.
+
+@item exp(@var{expr})
+the exponential function (@code{e ^ @var{expr}}).
+
+@item int(@var{expr})
+truncates to integer.
+
+@item log(@var{expr})
+the natural logarithm of @code{expr}.
+
+@item rand()
+a random number between zero and one.
+
+@item sin(@var{expr})
+the sine of @var{expr}, which is in radians.
+
+@item sqrt(@var{expr})
+the square root function.
+
+@item srand(@r{[}@var{expr}@r{]})
+use @var{expr} as a new seed for the random number generator. If no @var{expr}
+is provided, the time of day is used. The return value is the previous
+seed for the random number generator.
+@end table
+
+@code{awk} has the following built-in string functions:
+
+@table @code
+@item gensub(@var{regex}, @var{subst}, @var{how} @r{[}, @var{target}@r{]})
+If @var{how} is a string beginning with @samp{g} or @samp{G}, then
+replace each match of @var{regex} in @var{target} with @var{subst}.
+Otherwise, replace the @var{how}'th occurrence. If @var{target} is not
+supplied, use @code{$0}. The return value is the changed string; the
+original @var{target} is not modified. Within @var{subst},
+@samp{\@var{n}}, where @var{n} is a digit from one to nine, can be used to
+indicate the text that matched the @var{n}'th parenthesized
+subexpression.
+This function is @code{gawk}-specific.
+
+@item gsub(@var{regex}, @var{subst} @r{[}, @var{target}@r{]})
+for each substring matching the regular expression @var{regex} in the string
+@var{target}, substitute the string @var{subst}, and return the number of
+substitutions. If @var{target} is not supplied, use @code{$0}.
+
+@item index(@var{str}, @var{search})
+returns the index of the string @var{search} in the string @var{str}, or
+zero if
+@var{search} is not present.
+
+@item length(@r{[}@var{str}@r{]})
+returns the length of the string @var{str}. The length of @code{$0}
+is returned if no argument is supplied.
+
+@item match(@var{str}, @var{regex})
+returns the position in @var{str} where the regular expression @var{regex}
+occurs, or zero if @var{regex} is not present, and sets the values of
+@code{RSTART} and @code{RLENGTH}.
+
+@item split(@var{str}, @var{arr} @r{[}, @var{regex}@r{]})
+splits the string @var{str} into the array @var{arr} on the regular expression
+@var{regex}, and returns the number of elements. If @var{regex} is omitted,
+@code{FS} is used instead. @var{regex} can be the null string, causing
+each character to be placed into its own array element.
+The array @var{arr} is cleared first.
+
+@item sprintf(@var{fmt}, @var{expr-list})
+prints @var{expr-list} according to @var{fmt}, and returns the resulting string.
+
+@item sub(@var{regex}, @var{subst} @r{[}, @var{target}@r{]})
+just like @code{gsub}, but only the first matching substring is replaced.
+
+@item substr(@var{str}, @var{index} @r{[}, @var{len}@r{]})
+returns the @var{len}-character substring of @var{str} starting at @var{index}.
+If @var{len} is omitted, the rest of @var{str} is used.
+
+@item tolower(@var{str})
+returns a copy of the string @var{str}, with all the upper-case characters in
+@var{str} translated to their corresponding lower-case counterparts.
+Non-alphabetic characters are left unchanged.
+
+@item toupper(@var{str})
+returns a copy of the string @var{str}, with all the lower-case characters in
+@var{str} translated to their corresponding upper-case counterparts.
+Non-alphabetic characters are left unchanged.
+@end table
+
+The I/O related functions are:
+
+@table @code
+@item close(@var{expr})
+Close the open file or pipe denoted by @var{expr}.
+
+@item fflush(@r{[}@var{expr}@r{]})
+Flush any buffered output for the output file or pipe denoted by @var{expr}.
+If @var{expr} is omitted, standard output is flushed.
+If @var{expr} is the null string (@code{""}), all output buffers are flushed.
+
+@item system(@var{cmd-line})
+Execute the command @var{cmd-line}, and return the exit status.
+If your operating system does not support @code{system}, calling it will
+generate a fatal error.
+
+@samp{system("")} can be used to force @code{awk} to flush any pending
+output. This is more portable, but less obvious, than calling @code{fflush}.
+@end table
+
+@node Time Functions Summary, String Constants Summary, Built-in Functions Summary, Actions Summary
+@appendixsubsec Time Functions
+
+The following two functions are available for getting the current
+time of day, and for formatting time stamps.
+They are specific to @code{gawk}.
+
+@table @code
+@item systime()
+returns the current time of day as the number of seconds since a particular
+epoch (Midnight, January 1, 1970 UTC, on POSIX systems).
+
+@item strftime(@r{[}@var{format}@r{[}, @var{timestamp}@r{]]})
+formats @var{timestamp} according to the specification in @var{format}.
+The current time of day is used if no @var{timestamp} is supplied.
+A default format equivalent to the output of the @code{date} utility is used if
+no @var{format} is supplied.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
+details on the conversion specifiers that @code{strftime} accepts.
+@end table
+
+@iftex
+@xref{Built-in, ,Built-in Functions}, for a description of all of
+@code{awk}'s built-in functions.
+@end iftex
+
+@node String Constants Summary, , Time Functions Summary, Actions Summary
+@appendixsubsec String Constants
+
+String constants in @code{awk} are sequences of characters enclosed
+in double quotes (@code{"}). Within strings, certain @dfn{escape sequences}
+are recognized, as in C. These are:
+
+@table @code
+@item \\
+A literal backslash.
+
+@item \a
+The ``alert'' character; usually the ASCII BEL character.
+
+@item \b
+Backspace.
+
+@item \f
+Formfeed.
+
+@item \n
+Newline.
+
+@item \r
+Carriage return.
+
+@item \t
+Horizontal tab.
+
+@item \v
+Vertical tab.
+
+@item \x@var{hex digits}
+The character represented by the string of hexadecimal digits following
+the @samp{\x}. As in ANSI C, all following hexadecimal digits are
+considered part of the escape sequence. E.g., @code{"\x1B"} is a
+string containing the ASCII ESC (escape) character. (The @samp{\x}
+escape sequence is not in POSIX @code{awk}.)
+
+@item \@var{ddd}
+The character represented by the one, two, or three digit sequence of octal
+digits. Thus, @code{"\033"} is also a string containing the ASCII ESC
+(escape) character.
+
+@item \@var{c}
+The literal character @var{c}, if @var{c} is not one of the above.
+@end table
+
+The escape sequences may also be used inside constant regular expressions
+(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
+characters).
+
+@xref{Escape Sequences}.
+
+@node Functions Summary, Historical Features, Actions Summary, Gawk Summary
+@appendixsec User-defined Functions
+
+Functions in @code{awk} are defined as follows:
+
+@example
+function @var{name}(@var{parameter list}) @{ @var{statements} @}
+@end example
+
+Actual parameters supplied in the function call are used to instantiate
+the formal parameters declared in the function. Arrays are passed by
+reference, other variables are passed by value.
+
+If there are fewer arguments passed than there are names in @var{parameter-list},
+the extra names are given the null string as their value. Extra names have the
+effect of local variables.
+
+The open-parenthesis in a function call of a user-defined function must
+immediately follow the function name, without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+
+The word @code{func} may be used in place of @code{function} (but not in
+POSIX @code{awk}).
+
+Use the @code{return} statement to return a value from a function.
+
+@xref{User-defined, ,User-defined Functions}.
+
+@node Historical Features, , Functions Summary, Gawk Summary
+@appendixsec Historical Features
+
+@cindex historical features
+There are two features of historical @code{awk} implementations that
+@code{gawk} supports.
+
+First, it is possible to call the @code{length} built-in function not only
+with no arguments, but even without parentheses!
+
+@example
+a = length
+@end example
+
+@noindent
+is the same as either of
+
+@example
+a = length()
+a = length($0)
+@end example
+
+@noindent
+For example:
+
+@example
+$ echo abcdef | awk '@{ print length @}'
+@print{} 6
+@end example
+
+@noindent
+This feature is marked as ``deprecated'' in the POSIX standard, and
+@code{gawk} will issue a warning about its use if @samp{--lint} is
+specified on the command line.
+(The ability to use @code{length} this way was actually an accident of the
+original Unix @code{awk} implementation. If any built-in function used
+@code{$0} as its default argument, it was possible to call that function
+without the parentheses. In particular, it was common practice to use
+the @code{length} function in this fashion, and this usage was documented
+in the @code{awk} manual page.)
+
+The other historical feature is the use of either the @code{break} statement,
+or the @code{continue} statement
+outside the body of a @code{while}, @code{for}, or @code{do} loop. Traditional
+@code{awk} implementations have treated such usage as equivalent to the
+@code{next} statement. More recent versions of Unix @code{awk} do not allow
+it. @code{gawk} supports this usage if @samp{--traditional} has been
+specified.
+
+@xref{Options, ,Command Line Options}, for more information about the
+@samp{--posix} and @samp{--lint} options.
+
+@node Installation, Notes, Gawk Summary, Top
+@appendix Installing @code{gawk}
+
+This appendix provides instructions for installing @code{gawk} on the
+various platforms that are supported by the developers. The primary
+developers support Unix (and one day, GNU), while the other ports were
+contributed. The file @file{ACKNOWLEDGMENT} in the @code{gawk}
+distribution lists the electronic mail addresses of the people who did
+the respective ports, and they are also provided in
+@ref{Bugs, , Reporting Problems and Bugs}.
+
+@menu
+* Gawk Distribution:: What is in the @code{gawk} distribution.
+* Unix Installation:: Installing @code{gawk} under various versions
+ of Unix.
+* VMS Installation:: Installing @code{gawk} on VMS.
+* PC Installation:: Installing and Compiling @code{gawk} on MS-DOS
+ and OS/2
+* Atari Installation:: Installing @code{gawk} on the Atari ST.
+* Amiga Installation:: Installing @code{gawk} on an Amiga.
+* Bugs:: Reporting Problems and Bugs.
+* Other Versions:: Other freely available @code{awk}
+ implementations.
+@end menu
+
+@node Gawk Distribution, Unix Installation, Installation, Installation
+@appendixsec The @code{gawk} Distribution
+
+This section first describes how to get the @code{gawk}
+distribution, how to extract it, and then what is in the various files and
+subdirectories.
+
+@menu
+* Getting:: How to get the distribution.
+* Extracting:: How to extract the distribution.
+* Distribution contents:: What is in the distribution.
+@end menu
+
+@node Getting, Extracting, Gawk Distribution, Gawk Distribution
+@appendixsubsec Getting the @code{gawk} Distribution
+@cindex getting @code{gawk}
+@cindex anonymous @code{ftp}
+@cindex @code{ftp}, anonymous
+@cindex Free Software Foundation
+There are three ways you can get GNU software.
+
+@enumerate
+@item
+You can copy it from someone else who already has it.
+
+@cindex Free Software Foundation
+@item
+You can order @code{gawk} directly from the Free Software Foundation.
+Software distributions are available for Unix, MS-DOS, and VMS, on
+tape and CD-ROM. The address is:
+
+@quotation
+Free Software Foundation @*
+59 Temple Place---Suite 330 @*
+Boston, MA 02111-1307 USA @*
+Phone: +1-617-542-5942 @*
+Fax (including Japan): +1-617-542-2652 @*
+E-mail: @code{gnu@@prep.ai.mit.edu} @*
+@end quotation
+
+@noindent
+Ordering from the FSF directly contributes to the support of the foundation
+and to the production of more free software.
+
+@item
+You can get @code{gawk} by using anonymous @code{ftp} to the Internet host
+@code{ftp.gnu.ai.mit.edu}, in the directory @file{/pub/gnu}.
+
+Here is a list of alternate @code{ftp} sites from which you can obtain GNU
+software. When a site is listed as ``@var{site}@code{:}@var{directory}'' the
+@var{directory} indicates the directory where GNU software is kept.
+You should use a site that is geographically close to you.
+
+@table @asis
+@item Asia:
+@table @code
+@item cair-archive.kaist.ac.kr:/pub/gnu
+@itemx ftp.cs.titech.ac.jp
+@itemx ftp.nectec.or.th:/pub/mirrors/gnu
+@itemx utsun.s.u-tokyo.ac.jp:/ftpsync/prep
+@end table
+
+@item Australia:
+@table @code
+@item archie.au:/gnu
+(@code{archie.oz} or @code{archie.oz.au} for ACSnet)
+@end table
+
+@item Africa:
+@table @code
+@item ftp.sun.ac.za:/pub/gnu
+@end table
+
+@item Middle East:
+@table @code
+@item ftp.technion.ac.il:/pub/unsupported/gnu
+@end table
+
+@item Europe:
+@table @code
+@item archive.eu.net
+@itemx ftp.denet.dk
+@itemx ftp.eunet.ch
+@itemx ftp.funet.fi:/pub/gnu
+@itemx ftp.ieunet.ie:pub/gnu
+@itemx ftp.informatik.rwth-aachen.de:/pub/gnu
+@itemx ftp.informatik.tu-muenchen.de
+@itemx ftp.luth.se:/pub/unix/gnu
+@itemx ftp.mcc.ac.uk
+@itemx ftp.stacken.kth.se
+@itemx ftp.sunet.se:/pub/gnu
+@itemx ftp.univ-lyon1.fr:pub/gnu
+@itemx ftp.win.tue.nl:/pub/gnu
+@itemx irisa.irisa.fr:/pub/gnu
+@itemx isy.liu.se
+@itemx nic.switch.ch:/mirror/gnu
+@itemx src.doc.ic.ac.uk:/gnu
+@itemx unix.hensa.ac.uk:/pub/uunet/systems/gnu
+@end table
+
+@item South America:
+@table @code
+@item ftp.inf.utfsm.cl:/pub/gnu
+@itemx ftp.unicamp.br:/pub/gnu
+@end table
+
+@item Western Canada:
+@table @code
+@item ftp.cs.ubc.ca:/mirror2/gnu
+@end table
+
+@item USA:
+@table @code
+@item col.hp.com:/mirrors/gnu
+@itemx f.ms.uky.edu:/pub3/gnu
+@itemx ftp.cc.gatech.edu:/pub/gnu
+@itemx ftp.cs.columbia.edu:/archives/gnu/prep
+@itemx ftp.digex.net:/pub/gnu
+@itemx ftp.hawaii.edu:/mirrors/gnu
+@itemx ftp.kpc.com:/pub/mirror/gnu
+@end table
+
+@c NEEDED
+@page
+@item USA (continued):
+@table @code
+@itemx ftp.uu.net:/systems/gnu
+@itemx gatekeeper.dec.com:/pub/GNU
+@itemx jaguar.utah.edu:/gnustuff
+@itemx labrea.stanford.edu
+@itemx mrcnext.cso.uiuc.edu:/pub/gnu
+@itemx vixen.cso.uiuc.edu:/gnu
+@itemx wuarchive.wustl.edu:/systems/gnu
+@end table
+@end table
+@end enumerate
+
+@node Extracting, Distribution contents, Getting, Gawk Distribution
+@appendixsubsec Extracting the Distribution
+@code{gawk} is distributed as a @code{tar} file compressed with the
+GNU Zip program, @code{gzip}.
+
+Once you have the distribution (for example,
+@file{gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz}), first use @code{gzip} to expand the
+file, and then use @code{tar} to extract it. You can use the following
+pipeline to produce the @code{gawk} distribution:
+
+@example
+# Under System V, add 'o' to the tar flags
+gzip -d -c gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz | tar -xvpf -
+@end example
+
+@noindent
+This will create a directory named @file{gawk-@value{VERSION}.@value{PATCHLEVEL}} in the current
+directory.
+
+The distribution file name is of the form
+@file{gawk-@var{V}.@var{R}.@var{n}.tar.gz}.
+The @var{V} represents the major version of @code{gawk},
+the @var{R} represents the current release of version @var{V}, and
+the @var{n} represents a @dfn{patch level}, meaning that minor bugs have
+been fixed in the release. The current patch level is @value{PATCHLEVEL},
+but when
+retrieving distributions, you should get the version with the highest
+version, release, and patch level. (Note that release levels greater than
+or equal to 90 denote ``beta,'' or non-production software; you may not wish
+to retrieve such a version unless you don't mind experimenting.)
+
+If you are not on a Unix system, you will need to make other arrangements
+for getting and extracting the @code{gawk} distribution. You should consult
+a local expert.
+
+@node Distribution contents, , Extracting, Gawk Distribution
+@appendixsubsec Contents of the @code{gawk} Distribution
+
+The @code{gawk} distribution has a number of C source files,
+documentation files,
+subdirectories and files related to the configuration process
+(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
+and several subdirectories related to different, non-Unix,
+operating systems.
+
+@table @asis
+@item various @samp{.c}, @samp{.y}, and @samp{.h} files
+These files are the actual @code{gawk} source code.
+@end table
+
+@table @file
+@item README
+@itemx README_d/README.*
+Descriptive files: @file{README} for @code{gawk} under Unix, and the
+rest for the various hardware and software combinations.
+
+@item INSTALL
+A file providing an overview of the configuration and installation process.
+
+@item PORTS
+A list of systems to which @code{gawk} has been ported, and which
+have successfully run the test suite.
+
+@item ACKNOWLEDGMENT
+A list of the people who contributed major parts of the code or documentation.
+
+@item ChangeLog
+A detailed list of source code changes as bugs are fixed or improvements made.
+
+@item NEWS
+A list of changes to @code{gawk} since the last release or patch.
+
+@item COPYING
+The GNU General Public License.
+
+@item FUTURES
+A brief list of features and/or changes being contemplated for future
+releases, with some indication of the time frame for the feature, based
+on its difficulty.
+
+@item LIMITATIONS
+A list of those factors that limit @code{gawk}'s performance.
+Most of these depend on the hardware or operating system software, and
+are not limits in @code{gawk} itself.
+
+@item POSIX.STD
+A description of one area where the POSIX standard for @code{awk} is
+incorrect, and how @code{gawk} handles the problem.
+
+@item PROBLEMS
+A file describing known problems with the current release.
+
+@cindex artificial intelligence, using @code{gawk}
+@cindex AI programming, using @code{gawk}
+@item doc/awkforai.txt
+A short article describing why @code{gawk} is a good language for
+AI (Artificial Intelligence) programming.
+
+@item doc/README.card
+@itemx doc/ad.block
+@itemx doc/awkcard.in
+@itemx doc/cardfonts
+@itemx doc/colors
+@itemx doc/macros
+@itemx doc/no.colors
+@itemx doc/setter.outline
+The @code{troff} source for a five-color @code{awk} reference card.
+A modern version of @code{troff}, such as GNU Troff (@code{groff}) is
+needed to produce the color version. See the file @file{README.card}
+for instructions if you have an older @code{troff}.
+
+@item doc/gawk.1
+The @code{troff} source for a manual page describing @code{gawk}.
+This is distributed for the convenience of Unix users.
+
+@item doc/gawk.texi
+The Texinfo source file for this @value{DOCUMENT}.
+It should be processed with @TeX{} to produce a printed document, and
+with @code{makeinfo} to produce an Info file.
+
+@item doc/gawk.info
+The generated Info file for this @value{DOCUMENT}.
+
+@item doc/igawk.1
+The @code{troff} source for a manual page describing the @code{igawk}
+program presented in
+@ref{Igawk Program, ,An Easy Way to Use Library Functions}.
+
+@item doc/Makefile.in
+The input file used during the configuration process to generate the
+actual @file{Makefile} for creating the documentation.
+
+@item Makefile.in
+@itemx acconfig.h
+@itemx aclocal.m4
+@itemx configh.in
+@itemx configure.in
+@itemx configure
+@itemx custom.h
+@itemx missing/*
+These files and subdirectory are used when configuring @code{gawk}
+for various Unix systems. They are explained in detail in
+@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.
+
+@item awklib/extract.awk
+@itemx awklib/Makefile.in
+The @file{awklib} directory contains a copy of @file{extract.awk}
+(@pxref{Extract Program, ,Extracting Programs from Texinfo Source Files}),
+which can be used to extract the sample programs from the Texinfo
+source file for this @value{DOCUMENT}, and a @file{Makefile.in} file, which
+@code{configure} uses to generate a @file{Makefile}.
+As part of the process of building @code{gawk}, the library functions from
+@ref{Library Functions, , A Library of @code{awk} Functions},
+and the @code{igawk} program from
+@ref{Igawk Program, , An Easy Way to Use Library Functions},
+are extracted into ready to use files.
+They are installed as part of the installation process.
+
+@item atari/*
+Files needed for building @code{gawk} on an Atari ST.
+@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
+
+@item pc/*
+Files needed for building @code{gawk} under MS-DOS and OS/2.
+@xref{PC Installation, ,MS-DOS and OS/2 Installation and Compilation}, for details.
+
+@item vms/*
+Files needed for building @code{gawk} under VMS.
+@xref{VMS Installation, ,How to Compile and Install @code{gawk} on VMS}, for details.
+
+@item test/*
+A test suite for
+@code{gawk}. You can use @samp{make check} from the top level @code{gawk}
+directory to run your version of @code{gawk} against the test suite.
+If @code{gawk} successfully passes @samp{make check} then you can
+be confident of a successful port.
+@end table
+
+@node Unix Installation, VMS Installation, Gawk Distribution, Installation
+@appendixsec Compiling and Installing @code{gawk} on Unix
+
+Usually, you can compile and install @code{gawk} by typing only two
+commands. However, if you do use an unusual system, you may need
+to configure @code{gawk} for your system yourself.
+
+@menu
+* Quick Installation:: Compiling @code{gawk} under Unix.
+* Configuration Philosophy:: How it's all supposed to work.
+@end menu
+
+@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
+@appendixsubsec Compiling @code{gawk} for Unix
+
+@cindex installation, unix
+After you have extracted the @code{gawk} distribution, @code{cd}
+to @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}. Like most GNU software,
+@code{gawk} is configured
+automatically for your Unix system by running the @code{configure} program.
+This program is a Bourne shell script that was generated automatically using
+GNU @code{autoconf}.
+@iftex
+(The @code{autoconf} software is
+described fully in
+@cite{Autoconf---Generating Automatic Configuration Scripts},
+which is available from the Free Software Foundation.)
+@end iftex
+@ifinfo
+(The @code{autoconf} software is described fully starting with
+@ref{Top, , Introduction, autoconf, Autoconf---Generating Automatic Configuration Scripts}.)
+@end ifinfo
+
+To configure @code{gawk}, simply run @code{configure}:
+
+@example
+sh ./configure
+@end example
+
+This produces a @file{Makefile} and @file{config.h} tailored to your system.
+The @file{config.h} file describes various facts about your system.
+You may wish to edit the @file{Makefile} to
+change the @code{CFLAGS} variable, which controls
+the command line options that are passed to the C compiler (such as
+optimization levels, or compiling for debugging).
+
+Alternatively, you can add your own values for most @code{make}
+variables, such as @code{CC} and @code{CFLAGS}, on the command line when
+running @code{configure}:
+
+@example
+CC=cc CFLAGS=-g sh ./configure
+@end example
+
+@noindent
+See the file @file{INSTALL} in the @code{gawk} distribution for
+all the details.
+
+After you have run @code{configure}, and possibly edited the @file{Makefile},
+type:
+
+@example
+make
+@end example
+
+@noindent
+and shortly thereafter, you should have an executable version of @code{gawk}.
+That's all there is to it!
+(If these steps do not work, please send in a bug report;
+@pxref{Bugs, ,Reporting Problems and Bugs}.)
+
+@node Configuration Philosophy, , Quick Installation, Unix Installation
+@appendixsubsec The Configuration Process
+
+@cindex configuring @code{gawk}
+(This section is of interest only if you know something about using the
+C language and the Unix operating system.)
+
+The source code for @code{gawk} generally attempts to adhere to formal
+standards wherever possible. This means that @code{gawk} uses library
+routines that are specified by the ANSI C standard and by the POSIX
+operating system interface standard. When using an ANSI C compiler,
+function prototypes are used to help improve the compile-time checking.
+
+Many Unix systems do not support all of either the ANSI or the
+POSIX standards. The @file{missing} subdirectory in the @code{gawk}
+distribution contains replacement versions of those subroutines that are
+most likely to be missing.
+
+The @file{config.h} file that is created by the @code{configure} program
+contains definitions that describe features of the particular operating
+system where you are attempting to compile @code{gawk}. The three things
+described by this file are what header files are available, so that
+they can be correctly included,
+what (supposedly) standard functions are actually available in your C
+libraries, and
+other miscellaneous facts about your
+variant of Unix. For example, there may not be an @code{st_blksize}
+element in the @code{stat} structure. In this case @samp{HAVE_ST_BLKSIZE}
+would be undefined.
+
+@cindex @code{custom.h} configuration file
+It is possible for your C compiler to lie to @code{configure}. It may
+do so by not exiting with an error when a library function is not
+available. To get around this, you can edit the file @file{custom.h}.
+Use an @samp{#ifdef} that is appropriate for your system, and either
+@code{#define} any constants that @code{configure} should have defined but
+didn't, or @code{#undef} any constants that @code{configure} defined and
+should not have. @file{custom.h} is automatically included by
+@file{config.h}.
+
+It is also possible that the @code{configure} program generated by
+@code{autoconf}
+will not work on your system in some other fashion. If you do have a problem,
+the file
+@file{configure.in} is the input for @code{autoconf}. You may be able to
+change this file, and generate a new version of @code{configure} that will
+work on your system. @xref{Bugs, ,Reporting Problems and Bugs}, for
+information on how to report problems in configuring @code{gawk}. The same
+mechanism may be used to send in updates to @file{configure.in} and/or
+@file{custom.h}.
+
+@node VMS Installation, PC Installation, Unix Installation, Installation
+@appendixsec How to Compile and Install @code{gawk} on VMS
+
+@c based on material from Pat Rankin <rankin@eql.caltech.edu>
+
+@cindex installation, vms
+This section describes how to compile and install @code{gawk} under VMS.
+
+@menu
+* VMS Compilation:: How to compile @code{gawk} under VMS.
+* VMS Installation Details:: How to install @code{gawk} under VMS.
+* VMS Running:: How to run @code{gawk} under VMS.
+* VMS POSIX:: Alternate instructions for VMS POSIX.
+@end menu
+
+@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
+@appendixsubsec Compiling @code{gawk} on VMS
+
+To compile @code{gawk} under VMS, there is a @code{DCL} command procedure that
+will issue all the necessary @code{CC} and @code{LINK} commands, and there is
+also a @file{Makefile} for use with the @code{MMS} utility. From the source
+directory, use either
+
+@example
+$ @@[.VMS]VMSBUILD.COM
+@end example
+
+@noindent
+or
+
+@example
+$ MMS/DESCRIPTION=[.VMS]DESCRIP.MMS GAWK
+@end example
+
+Depending upon which C compiler you are using, follow one of the sets
+of instructions in this table:
+
+@table @asis
+@item VAX C V3.x
+Use either @file{vmsbuild.com} or @file{descrip.mms} as is. These use
+@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
+
+@item VAX C V2.x
+You must have Version 2.3 or 2.4; older ones won't work. Edit either
+@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
+For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
+Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
+and comment out or delete the two lines @samp{#define __STDC__ 0} and
+@samp{#define VAXC_BUILTINS} near the end.
+
+@item GNU C
+Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
+from those for VAX C V2.x, but equally straightforward. No changes to
+@file{config.h} should be needed.
+
+@item DEC C
+Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
+No changes to @file{config.h} should be needed.
+@end table
+
+@code{gawk} has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6 and up.
+
+@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
+@appendixsubsec Installing @code{gawk} on VMS
+
+To install @code{gawk}, all you need is a ``foreign'' command, which is
+a @code{DCL} symbol whose value begins with a dollar sign. For example:
+
+@example
+$ GAWK :== $disk1:[gnubin]GAWK
+@end example
+
+@noindent
+(Substitute the actual location of @code{gawk.exe} for
+@samp{$disk1:[gnubin]}.) The symbol should be placed in the
+@file{login.com} of any user who wishes to run @code{gawk},
+so that it will be defined every time the user logs on.
+Alternatively, the symbol may be placed in the system-wide
+@file{sylogin.com} procedure, which will allow all users
+to run @code{gawk}.
+
+Optionally, the help entry can be loaded into a VMS help library:
+
+@example
+$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+@end example
+
+@noindent
+(You may want to substitute a site-specific help library rather than
+the standard VMS library @samp{HELPLIB}.) After loading the help text,
+
+@example
+$ HELP GAWK
+@end example
+
+@noindent
+will provide information about both the @code{gawk} implementation and the
+@code{awk} programming language.
+
+The logical name @samp{AWK_LIBRARY} can designate a default location
+for @code{awk} program files. For the @samp{-f} option, if the specified
+filename has no device or directory path information in it, @code{gawk}
+will look in the current directory first, then in the directory specified
+by the translation of @samp{AWK_LIBRARY} if the file was not found.
+If after searching in both directories, the file still is not found,
+then @code{gawk} appends the suffix @samp{.awk} to the filename and the
+file search will be re-tried. If @samp{AWK_LIBRARY} is not defined, that
+portion of the file search will fail benignly.
+
+@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
+@appendixsubsec Running @code{gawk} on VMS
+
+Command line parsing and quoting conventions are significantly different
+on VMS, so examples in this @value{DOCUMENT} or from other sources often need minor
+changes. They @emph{are} minor though, and all @code{awk} programs
+should run correctly.
+
+Here are a couple of trivial tests:
+
+@example
+$ gawk -- "BEGIN @{print ""Hello, World!""@}"
+$ gawk -"W" version
+! could also be -"W version" or "-W version"
+@end example
+
+@noindent
+Note that upper-case and mixed-case text must be quoted.
+
+The VMS port of @code{gawk} includes a @code{DCL}-style interface in addition
+to the original shell-style interface (see the help entry for details).
+One side-effect of dual command line parsing is that if there is only a
+single parameter (as in the quoted string program above), the command
+becomes ambiguous. To work around this, the normally optional @samp{--}
+flag is required to force Unix style rather than @code{DCL} parsing. If any
+other dash-type options (or multiple parameters such as data files to be
+processed) are present, there is no ambiguity and @samp{--} can be omitted.
+
+The default search path when looking for @code{awk} program files specified
+by the @samp{-f} option is @code{"SYS$DISK:[],AWK_LIBRARY:"}. The logical
+name @samp{AWKPATH} can be used to override this default. The format
+of @samp{AWKPATH} is a comma-separated list of directory specifications.
+When defining it, the value should be quoted so that it retains a single
+translation, and not a multi-translation @code{RMS} searchlist.
+
+@node VMS POSIX, , VMS Running, VMS Installation
+@appendixsubsec Building and Using @code{gawk} on VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library. The source tree should be unpacked
+into a container file subsystem rather than into the ordinary VMS file
+system. Make sure that the two scripts, @file{configure} and
+@file{vms/posix-cc.sh}, are executable; use @samp{chmod +x} on them if
+necessary. Then execute the following two commands:
+
+@example
+@group
+psx> CC=vms/posix-cc.sh configure
+psx> make CC=c89 gawk
+@end group
+@end example
+
+@noindent
+The first command will construct files @file{config.h} and @file{Makefile} out
+of templates, using a script to make the C compiler fit @code{configure}'s
+expectations. The second command will compile and link @code{gawk} using
+the C compiler directly; ignore any warnings from @code{make} about being
+unable to redefine @code{CC}. @code{configure} will take a very long
+time to execute, but at least it provides incremental feedback as it
+runs.
+
+This has been tested with VAX/VMS V6.2, VMS POSIX V2.0, and DEC C V5.2.
+
+Once built, @code{gawk} will work like any other shell utility. Unlike
+the normal VMS port of @code{gawk}, no special command line manipulation is
+needed in the VMS POSIX environment.
+
+@c Rewritten by Scott Deifik <scottd@amgen.com>
+@c and Darrel Hankerson <hankedr@mail.auburn.edu>
+@node PC Installation, Atari Installation, VMS Installation, Installation
+@appendixsec MS-DOS and OS/2 Installation and Compilation
+
+@cindex installation, MS-DOS and OS/2
+If you have received a binary distribution prepared by the DOS
+maintainers, then @code{gawk} and the necessary support files will appear
+under the @file{gnu} directory, with executables in @file{gnu/bin},
+libraries in @file{gnu/lib/awk}, and manual pages under @file{gnu/man}.
+This is designed for easy installation to a @file{/gnu} directory on your
+drive, but the files can be installed anywhere provided @code{AWKPATH} is
+set properly. Regardless of the installation directory, the first line of
+@file{igawk.cmd} and @file{igawk.bat} (in @file{gnu/bin}) may need to be
+edited.
+
+The binary distribution will contain a separate file describing the
+contents. In particular, it may include more than one version of the
+@code{gawk} executable. OS/2 binary distributions may have a
+different arrangement, but installation is similar.
+
+The OS/2 and MS-DOS versions of @code{gawk} search for program files as
+described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+However, semicolons (rather than colons) separate elements
+in the @code{AWKPATH} variable. If @code{AWKPATH} is not set or is empty,
+then the default search path is @code{@w{".;c:/lib/awk;c:/gnu/lib/awk"}}.
+
+An @code{sh}-like shell (as opposed to @code{command.com} under MS-DOS
+or @code{cmd.exe} under OS/2) may be useful for @code{awk} programming.
+Ian Stewartson has written an excellent shell for MS-DOS and OS/2, and a
+@code{ksh} clone and GNU Bash are available for OS/2. The file
+@file{README_d/README.pc} in the @code{gawk} distribution contains
+information on these shells. Users of Stewartson's shell on DOS should
+examine its documentation on handling of command-lines. In particular,
+the setting for @code{gawk} in the shell configuration may need to be
+changed, and the @code{ignoretype} option may also be of interest.
+
+@code{gawk} can be compiled for MS-DOS and OS/2 using the GNU development tools
+from DJ Delorie (DJGPP, MS-DOS-only) or Eberhard Mattes (EMX, MS-DOS and OS/2).
+Microsoft C can be used to build 16-bit versions for MS-DOS and OS/2. The file
+@file{README_d/README.pc} in the @code{gawk} distribution contains additional
+notes, and @file{pc/Makefile} contains important notes on compilation options.
+
+To build @code{gawk}, copy the files in the @file{pc} directory (@emph{except}
+for @file{ChangeLog}) to the
+directory with the rest of the @code{gawk} sources. The @file{Makefile}
+contains a configuration section with comments, and may need to be
+edited in order to work with your @code{make} utility.
+
+The @file{Makefile} contains a number of targets for building various MS-DOS
+and OS/2 versions. A list of targets will be printed if the @code{make}
+command is given without a target. As an example, to build @code{gawk}
+using the DJGPP tools, enter @samp{make djgpp}.
+
+Using @code{make} to run the standard tests and to install @code{gawk}
+requires additional Unix-like tools, including @code{sh}, @code{sed}, and
+@code{cp}. In order to run the tests, the @file{test/*.ok} files may need to
+be converted so that they have the usual DOS-style end-of-line markers. Most
+of the tests will work properly with Stewartson's shell along with the
+companion utilities or appropriate GNU utilities. However, some editing of
+@file{test/Makefile} is required. It is recommended that the file
+@file{pc/Makefile.tst} be copied to @file{test/Makefile} as a
+replacement. Details can be found in @file{README_d/README.pc}.
+
+@node Atari Installation, Amiga Installation, PC Installation, Installation
+@appendixsec Installing @code{gawk} on the Atari ST
+
+@c based on material from Michal Jaegermann <michal@gortel.phys.ualberta.ca>
+
+@cindex atari
+@cindex installation, atari
+There are no substantial differences when installing @code{gawk} on
+various Atari models. Compiled @code{gawk} executables do not require
+a large amount of memory with most @code{awk} programs and should run on all
+Motorola processor based models (called further ST, even if that is not
+exactly right).
+
+In order to use @code{gawk}, you need to have a shell, either text or
+graphics, that does not map all the characters of a command line to
+upper-case. Maintaining case distinction in option flags is very
+important (@pxref{Options, ,Command Line Options}).
+These days this is the default, and it may only be a problem for some
+very old machines. If your system does not preserve the case of option
+flags, you will need to upgrade your tools. Support for I/O
+redirection is necessary to make it easy to import @code{awk} programs
+from other environments. Pipes are nice to have, but not vital.
+
+@menu
+* Atari Compiling:: Compiling @code{gawk} on Atari
+* Atari Using:: Running @code{gawk} on Atari
+@end menu
+
+@node Atari Compiling, Atari Using, Atari Installation, Atari Installation
+@appendixsubsec Compiling @code{gawk} on the Atari ST
+
+A proper compilation of @code{gawk} sources when @code{sizeof(int)}
+differs from @code{sizeof(void *)} requires an ANSI C compiler. An initial
+port was done with @code{gcc}. You may actually prefer executables
+where @code{int}s are four bytes wide, but the other variant works as well.
+
+You may need quite a bit of memory when trying to recompile the @code{gawk}
+sources, as some source files (@file{regex.c} in particular) are quite
+big. If you run out of memory compiling such a file, try reducing the
+optimization level for this particular file; this may help.
+
+@cindex Linux
+With a reasonable shell (Bash will do), and in particular if you run
+Linux, MiNT or a similar operating system, you have a pretty good
+chance that the @code{configure} utility will succeed. Otherwise
+sample versions of @file{config.h} and @file{Makefile.st} are given in the
+@file{atari} subdirectory and can be edited and copied to the
+corresponding files in the main source directory. Even if
+@code{configure} produced something, it might be advisable to compare
+its results with the sample versions and possibly make adjustments.
+
+Some @code{gawk} source code fragments depend on a preprocessor define
+@samp{atarist}. This basically assumes the TOS environment with @code{gcc}.
+Modify these sections as appropriate if they are not right for your
+environment. Also see the remarks about @code{AWKPATH} and @code{envsep} in
+@ref{Atari Using, ,Running @code{gawk} on the Atari ST}.
+
+As shipped, the sample @file{config.h} claims that the @code{system}
+function is missing from the libraries, which is not true, and an
+alternative implementation of this function is provided in
+@file{atari/system.c}. Depending upon your particular combination of
+shell and operating system, you may wish to change the file to indicate
+that @code{system} is available.
+
+@node Atari Using, , Atari Compiling, Atari Installation
+@appendixsubsec Running @code{gawk} on the Atari ST
+
+An executable version of @code{gawk} should be placed, as usual,
+anywhere in your @code{PATH} where your shell can find it.
+
+While executing, @code{gawk} creates a number of temporary files. When
+using @code{gcc} libraries for TOS, @code{gawk} looks for either of
+the environment variables @code{TEMP} or @code{TMPDIR}, in that order.
+If either one is found, its value is assumed to be a directory for
+temporary files. This directory must exist, and if you can spare the
+memory, it is a good idea to put it on a RAM drive. If neither
+@code{TEMP} nor @code{TMPDIR} are found, then @code{gawk} uses the
+current directory for its temporary files.
+
+The ST version of @code{gawk} searches for its program files as described in
+@ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+The default value for the @code{AWKPATH} variable is taken from
+@code{DEFPATH} defined in @file{Makefile}. The sample @code{gcc}/TOS
+@file{Makefile} for the ST in the distribution sets @code{DEFPATH} to
+@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}. The search path can be
+modified by explicitly setting @code{AWKPATH} to whatever you wish.
+Note that colons cannot be used on the ST to separate elements in the
+@code{AWKPATH} variable, since they have another, reserved, meaning.
+Instead, you must use a comma to separate elements in the path. When
+recompiling, the separating character can be modified by initializing
+the @code{envsep} variable in @file{atari/gawkmisc.atr} to another
+value.
+
+Although @code{awk} allows great flexibility in doing I/O redirections
+from within a program, this facility should be used with care on the ST
+running under TOS. In some circumstances the OS routines for file
+handle pool processing lose track of certain events, causing the
+computer to crash, and requiring a reboot. Often a warm reboot is
+sufficient. Fortunately, this happens infrequently, and in rather
+esoteric situations. In particular, avoid having one part of an
+@code{awk} program using @code{print} statements explicitly redirected
+to @code{"/dev/stdout"}, while other @code{print} statements use the
+default standard output, and a calling shell has redirected standard
+output to a file.
+
+When @code{gawk} is compiled with the ST version of @code{gcc} and its
+usual libraries, it will accept both @samp{/} and @samp{\} as path separators.
+While this is convenient, it should be remembered that this removes one,
+technically valid, character (@samp{/}) from your file names, and that
+it may create problems for external programs, called via the @code{system}
+function, which may not support this convention. Whenever it is possible
+that a file created by @code{gawk} will be used by some other program,
+use only backslashes. Also remember that in @code{awk}, backslashes in
+strings have to be doubled in order to get literal backslashes
+(@pxref{Escape Sequences}).
+
+@node Amiga Installation, Bugs, Atari Installation, Installation
+@appendixsec Installing @code{gawk} on an Amiga
+
+@cindex amiga
+@cindex installation, amiga
+You can install @code{gawk} on an Amiga system using a Unix emulation
+environment available via anonymous @code{ftp} from
+@code{ftp.ninemoons.com} in the directory @file{pub/ade/current}.
+This includes a shell based on @code{pdksh}. The primary component of
+this environment is a Unix emulation library, @file{ixemul.lib}.
+@c could really use more background here, who wrote this, etc.
+
+A more complete distribution for the Amiga is available on
+the Geek Gadgets CD-ROM from:
+
+@quotation
+CRONUS @*
+1840 E. Warner Road #105-265 @*
+Tempe, AZ 85284 USA @*
+US Toll Free: (800) 804-0833 @*
+Phone: +1-602-491-0442 @*
+FAX: +1-602-491-0048 @*
+Email: @code{info@@ninemoons.com} @*
+WWW: @code{http://www.ninemoons.com} @*
+Anonymous @code{ftp} site: @code{ftp.ninemoons.com} @*
+@end quotation
+
+Once you have the distribution, you can configure @code{gawk} simply by
+running @code{configure}:
+
+@example
+configure -v m68k-amigaos
+@end example
+
+Then run @code{make}, and you should be all set!
+(If these steps do not work, please send in a bug report;
+@pxref{Bugs, ,Reporting Problems and Bugs}.)
+
+@node Bugs, Other Versions, Amiga Installation, Installation
+@appendixsec Reporting Problems and Bugs
+@display
+@i{There is nothing more dangerous than a bored archeologist.}
+The Hitchhiker's Guide to the Galaxy
+@c the radio show, not the book. :-)
+@end display
+@sp 1
+
+If you have problems with @code{gawk} or think that you have found a bug,
+please report it to the developers; we cannot promise to do anything
+but we might well want to fix it.
+
+Before reporting a bug, make sure you have actually found a real bug.
+Carefully reread the documentation and see if it really says you can do
+what you're trying to do. If it's not clear whether you should be able
+to do something or not, report that too; it's a bug in the documentation!
+
+Before reporting a bug or trying to fix it yourself, try to isolate it
+to the smallest possible @code{awk} program and input data file that
+reproduces the problem. Then send us the program and data file,
+some idea of what kind of Unix system you're using, and the exact results
+@code{gawk} gave you. Also say what you expected to occur; this will help
+us decide whether the problem was really in the documentation.
+
+Once you have a precise problem, there are two e-mail addresses you
+can send mail to.
+
+@table @asis
+@item Internet:
+@samp{bug-gnu-utils@@prep.ai.mit.edu}
+
+@item UUCP:
+@samp{uunet!prep.ai.mit.edu!bug-gnu-utils}
+@end table
+
+Please include the
+version number of @code{gawk} you are using. You can get this information
+with the command @samp{gawk --version}.
+You should send a carbon copy of your mail to Arnold Robbins, who can
+be reached at @samp{arnold@@gnu.ai.mit.edu}.
+
+@cindex @code{comp.lang.awk}
+@strong{Important!} Do @emph{not} try to report bugs in @code{gawk} by
+posting to the Usenet/Internet newsgroup @code{comp.lang.awk}.
+While the @code{gawk} developers do occasionally read this newsgroup,
+there is no guarantee that we will see your posting. The steps described
+above are the official, recognized ways for reporting bugs.
+
+Non-bug suggestions are always welcome as well. If you have questions
+about things that are unclear in the documentation or are just obscure
+features, ask Arnold Robbins; he will try to help you out, although he
+may not have the time to fix the problem. You can send him electronic
+mail at the Internet address above.
+
+If you find bugs in one of the non-Unix ports of @code{gawk}, please send
+an electronic mail message to the person who maintains that port. They
+are listed below, and also in the @file{README} file in the @code{gawk}
+distribution. Information in the @file{README} file should be considered
+authoritative if it conflicts with this @value{DOCUMENT}.
+
+@c NEEDED for looks
+@page
+The people maintaining the non-Unix ports of @code{gawk} are:
+
+@cindex Deifik, Scott
+@cindex Fish, Fred
+@cindex Hankerson, Darrel
+@cindex Jaegermann, Michal
+@cindex Rankin, Pat
+@cindex Rommel, Kai Uwe
+@table @asis
+@item MS-DOS
+Scott Deifik, @samp{scottd@@amgen.com}, and
+Darrel Hankerson, @samp{hankedr@@mail.auburn.edu}.
+
+@item OS/2
+Kai Uwe Rommel, @samp{rommel@@ars.de}.
+
+@item VMS
+Pat Rankin, @samp{rankin@@eql.caltech.edu}.
+
+@item Atari ST
+Michal Jaegermann, @samp{michal@@gortel.phys.ualberta.ca}.
+
+@item Amiga
+Fred Fish, @samp{fnf@@ninemoons.com}.
+@end table
+
+If your bug is also reproducible under Unix, please send copies of your
+report to the general GNU bug list, as well as to Arnold Robbins, at the
+addresses listed above.
+
+@node Other Versions, , Bugs, Installation
+@appendixsec Other Freely Available @code{awk} Implementations
+@cindex Brennan, Michael
+@ignore
+From: emory!amc.com!brennan (Michael Brennan)
+Subject: C++ comments in awk programs
+To: arnold@gnu.ai.mit.edu (Arnold Robbins)
+Date: Wed, 4 Sep 1996 08:11:48 -0700 (PDT)
+
+@end ignore
+@display
+@i{It's kind of fun to put comments like this in your awk code.}
+ @code{// Do C++ comments work? answer: yes! of course}
+Michael Brennan
+@end display
+@sp 1
+
+There are two other freely available @code{awk} implementations.
+This section briefly describes where to get them.
+
+@table @asis
+@cindex Kernighan, Brian
+@cindex anonymous @code{ftp}
+@cindex @code{ftp}, anonymous
+@item Unix @code{awk}
+Brian Kernighan has been able to make his implementation of
+@code{awk} freely available. You can get it via anonymous @code{ftp}
+to the host @code{@w{netlib.att.com}}. Change directory to
+@file{/netlib/research}. Use ``binary'' or ``image'' mode, and
+retrieve @file{awk.bundle.Z}.
+
+This is a shell archive that has been compressed with the @code{compress}
+utility. It can be uncompressed with either @code{uncompress} or the
+GNU @code{gunzip} utility.
+
+This version requires an ANSI C compiler; GCC (the GNU C compiler)
+works quite nicely.
+
+@cindex Brennan, Michael
+@cindex @code{mawk}
+@item @code{mawk}
+Michael Brennan has written an independent implementation of @code{awk},
+called @code{mawk}. It is available under the GPL
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}),
+just as @code{gawk} is.
+
+You can get it via anonymous @code{ftp} to the host
+@code{@w{ftp.whidbey.net}}. Change directory to @file{/pub/brennan}.
+Use ``binary'' or ``image'' mode, and retrieve @file{mawk1.3.3.tar.gz}
+(or the latest version that is there).
+
+@code{gunzip} may be used to decompress this file. Installation
+is similar to @code{gawk}'s
+(@pxref{Unix Installation, , Compiling and Installing @code{gawk} on Unix}).
+@end table
+
+@node Notes, Glossary, Installation, Top
+@appendix Implementation Notes
+
+This appendix contains information mainly of interest to implementors and
+maintainers of @code{gawk}. Everything in it applies specifically to
+@code{gawk}, and not to other implementations.
+
+@menu
+* Compatibility Mode:: How to disable certain @code{gawk} extensions.
+* Additions:: Making Additions To @code{gawk}.
+* Future Extensions:: New features that may be implemented one day.
+* Improvements:: Suggestions for improvements by volunteers.
+@end menu
+
+@node Compatibility Mode, Additions, Notes, Notes
+@appendixsec Downward Compatibility and Debugging
+
+@xref{POSIX/GNU, ,Extensions in @code{gawk} Not in POSIX @code{awk}},
+for a summary of the GNU extensions to the @code{awk} language and program.
+All of these features can be turned off by invoking @code{gawk} with the
+@samp{--traditional} option, or with the @samp{--posix} option.
+
+If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
+is one more option available on the command line:
+
+@table @code
+@item -W parsedebug
+@itemx --parsedebug
+Print out the parse stack information as the program is being parsed.
+@end table
+
+This option is intended only for serious @code{gawk} developers,
+and not for the casual user. It probably has not even been compiled into
+your version of @code{gawk}, since it slows down execution.
+
+@node Additions, Future Extensions, Compatibility Mode, Notes
+@appendixsec Making Additions to @code{gawk}
+
+If you should find that you wish to enhance @code{gawk} in a significant
+fashion, you are perfectly free to do so. That is the point of having
+free software; the source code is available, and you are free to change
+it as you wish (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+
+This section discusses the ways you might wish to change @code{gawk},
+and any considerations you should bear in mind.
+
+@menu
+* Adding Code:: Adding code to the main body of @code{gawk}.
+* New Ports:: Porting @code{gawk} to a new operating system.
+@end menu
+
+@node Adding Code, New Ports, Additions, Additions
+@appendixsubsec Adding New Features
+
+@cindex adding new features
+@cindex features, adding
+You are free to add any new features you like to @code{gawk}.
+However, if you want your changes to be incorporated into the @code{gawk}
+distribution, there are several steps that you need to take in order to
+make it possible for me to include to your changes.
+
+@enumerate 1
+@item
+Get the latest version.
+It is much easier for me to integrate changes if they are relative to
+the most recent distributed version of @code{gawk}. If your version of
+@code{gawk} is very old, I may not be able to integrate them at all.
+@xref{Getting, ,Getting the @code{gawk} Distribution},
+for information on getting the latest version of @code{gawk}.
+
+@item
+@iftex
+Follow the @cite{GNU Coding Standards}.
+@end iftex
+@ifinfo
+See @inforef{Top, , Version, standards, GNU Coding Standards}.
+@end ifinfo
+This document describes how GNU software should be written. If you haven't
+read it, please do so, preferably @emph{before} starting to modify @code{gawk}.
+(The @cite{GNU Coding Standards} are available as part of the Autoconf
+distribution, from the FSF.)
+
+@cindex @code{gawk} coding style
+@cindex coding style used in @code{gawk}
+@item
+Use the @code{gawk} coding style.
+The C code for @code{gawk} follows the instructions in the
+@cite{GNU Coding Standards}, with minor exceptions. The code is formatted
+using the traditional ``K&R'' style, particularly as regards the placement
+of braces and the use of tabs. In brief, the coding rules for @code{gawk}
+are:
+
+@itemize @bullet
+@item
+Use old style (non-prototype) function headers when defining functions.
+
+@item
+Put the name of the function at the beginning of its own line.
+
+@item
+Put the return type of the function, even if it is @code{int}, on the
+line above the line with the name and arguments of the function.
+
+@item
+The declarations for the function arguments should not be indented.
+
+@item
+Put spaces around parentheses used in control structures
+(@code{if}, @code{while}, @code{for}, @code{do}, @code{switch}
+and @code{return}).
+
+@item
+Do not put spaces in front of parentheses used in function calls.
+
+@item
+Put spaces around all C operators, and after commas in function calls.
+
+@item
+Do not use the comma operator to produce multiple side-effects, except
+in @code{for} loop initialization and increment parts, and in macro bodies.
+
+@item
+Use real tabs for indenting, not spaces.
+
+@item
+Use the ``K&R'' brace layout style.
+
+@item
+Use comparisons against @code{NULL} and @code{'\0'} in the conditions of
+@code{if}, @code{while} and @code{for} statements, and in the @code{case}s
+of @code{switch} statements, instead of just the
+plain pointer or character value.
+
+@item
+Use the @code{TRUE}, @code{FALSE}, and @code{NULL} symbolic constants,
+and the character constant @code{'\0'} where appropriate, instead of @code{1}
+and @code{0}.
+
+@item
+Provide one-line descriptive comments for each function.
+
+@item
+Do not use @samp{#elif}. Many older Unix C compilers cannot handle it.
+
+@item
+Do not use the @code{alloca} function for allocating memory off the stack.
+Its use causes more portability trouble than the minor benefit of not having
+to free the storage. Instead, use @code{malloc} and @code{free}.
+@end itemize
+
+If I have to reformat your code to follow the coding style used in
+@code{gawk}, I may not bother.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your changes, you must either place
+those changes in the public domain, and submit a signed statement to that
+effect, or assign the copyright in your changes to the FSF.
+Both of these actions are easy to do, and @emph{many} people have done so
+already. If you have questions, please contact me
+(@pxref{Bugs, , Reporting Problems and Bugs}),
+or @code{gnu@@prep.ai.mit.edu}.
+
+@item
+Update the documentation.
+Along with your new code, please supply new sections and or chapters
+for this @value{DOCUMENT}. If at all possible, please use real
+Texinfo, instead of just supplying unformatted ASCII text (although
+even that is better than no documentation at all).
+Conventions to be followed in @cite{@value{TITLE}} are provided
+after the @samp{@@bye} at the end of the Texinfo source file.
+If possible, please update the man page as well.
+
+You will also have to sign paperwork for your documentation changes.
+
+@item
+Submit changes as context diffs or unified diffs.
+Use @samp{diff -c -r -N} or @samp{diff -u -r -N} to compare
+the original @code{gawk} source tree with your version.
+(I find context diffs to be more readable, but unified diffs are
+more compact.)
+I recommend using the GNU version of @code{diff}.
+Send the output produced by either run of @code{diff} to me when you
+submit your changes.
+@xref{Bugs, , Reporting Problems and Bugs}, for the electronic mail
+information.
+
+Using this format makes it easy for me to apply your changes to the
+master version of the @code{gawk} source code (using @code{patch}).
+If I have to apply the changes manually, using a text editor, I may
+not do so, particularly if there are lots of changes.
+@end enumerate
+
+Although this sounds like a lot of work, please remember that while you
+may write the new code, I have to maintain it and support it, and if it
+isn't possible for me to do that with a minimum of extra work, then I
+probably will not.
+
+@node New Ports, , Adding Code, Additions
+@appendixsubsec Porting @code{gawk} to a New Operating System
+
+@cindex porting @code{gawk}
+If you wish to port @code{gawk} to a new operating system, there are
+several steps to follow.
+
+@enumerate 1
+@item
+Follow the guidelines in
+@ref{Adding Code, ,Adding New Features},
+concerning coding style, submission of diffs, and so on.
+
+@item
+When doing a port, bear in mind that your code must co-exist peacefully
+with the rest of @code{gawk}, and the other ports. Avoid gratuitous
+changes to the system-independent parts of the code. If at all possible,
+avoid sprinkling @samp{#ifdef}s just for your port throughout the
+code.
+
+If the changes needed for a particular system affect too much of the
+code, I probably will not accept them. In such a case, you will, of course,
+be able to distribute your changes on your own, as long as you comply
+with the GPL
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+
+@item
+A number of the files that come with @code{gawk} are maintained by other
+people at the Free Software Foundation. Thus, you should not change them
+unless it is for a very good reason. I.e.@: changes are not out of the
+question, but changes to these files will be scrutinized extra carefully.
+The files are @file{alloca.c}, @file{getopt.h}, @file{getopt.c},
+@file{getopt1.c}, @file{regex.h}, @file{regex.c}, @file{dfa.h},
+@file{dfa.c}, @file{install-sh}, and @file{mkinstalldirs}.
+
+@item
+Be willing to continue to maintain the port.
+Non-Unix operating systems are supported by volunteers who maintain
+the code needed to compile and run @code{gawk} on their systems. If no-one
+volunteers to maintain a port, that port becomes unsupported, and it may
+be necessary to remove it from the distribution.
+
+@item
+Supply an appropriate @file{gawkmisc.???} file.
+Each port has its own @file{gawkmisc.???} that implements certain
+operating system specific functions. This is cleaner than a plethora of
+@samp{#ifdef}s scattered throughout the code. The @file{gawkmisc.c} in
+the main source directory includes the appropriate
+@file{gawkmisc.???} file from each subdirectory.
+Be sure to update it as well.
+
+Each port's @file{gawkmisc.???} file has a suffix reminiscent of the machine
+or operating system for the port. For example, @file{pc/gawkmisc.pc} and
+@file{vms/gawkmisc.vms}. The use of separate suffixes, instead of plain
+@file{gawkmisc.c}, makes it possible to move files from a port's subdirectory
+into the main subdirectory, without accidentally destroying the real
+@file{gawkmisc.c} file. (Currently, this is only an issue for the MS-DOS
+and OS/2 ports.)
+
+@item
+Supply a @file{Makefile} and any other C source and header files that are
+necessary for your operating system. All your code should be in a
+separate subdirectory, with a name that is the same as, or reminiscent
+of, either your operating system or the computer system. If possible,
+try to structure things so that it is not necessary to move files out
+of the subdirectory into the main source directory. If that is not
+possible, then be sure to avoid using names for your files that
+duplicate the names of files in the main source directory.
+
+@item
+Update the documentation.
+Please write a section (or sections) for this @value{DOCUMENT} describing the
+installation and compilation steps needed to install and/or compile
+@code{gawk} for your system.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your code, you must either place
+your code in the public domain, and submit a signed statement to that
+effect, or assign the copyright in your code to the FSF.
+@ifinfo
+Both of these actions are easy to do, and @emph{many} people have done so
+already. If you have questions, please contact me, or
+@code{gnu@@prep.ai.mit.edu}.
+@end ifinfo
+@end enumerate
+
+Following these steps will make it much easier to integrate your changes
+into @code{gawk}, and have them co-exist happily with the code for other
+operating systems that is already there.
+
+In the code that you supply, and that you maintain, feel free to use a
+coding style and brace layout that suits your taste.
+
+@node Future Extensions, Improvements, Additions, Notes
+@appendixsec Probable Future Extensions
+@ignore
+From emory!scalpel.netlabs.com!lwall Tue Oct 31 12:43:17 1995
+Return-Path: <emory!scalpel.netlabs.com!lwall>
+Message-Id: <9510311732.AA28472@scalpel.netlabs.com>
+To: arnold@skeeve.atl.ga.us (Arnold D. Robbins)
+Subject: Re: May I quote you?
+In-Reply-To: Your message of "Tue, 31 Oct 95 09:11:00 EST."
+ <m0tAHPQ-00014MC@skeeve.atl.ga.us>
+Date: Tue, 31 Oct 95 09:32:46 -0800
+From: Larry Wall <emory!scalpel.netlabs.com!lwall>
+
+: Greetings. I am working on the release of gawk 3.0. Part of it will be a
+: thoroughly updated manual. One of the sections deals with planned future
+: extensions and enhancements. I have the following at the beginning
+: of it:
+:
+: @cindex PERL
+: @cindex Wall, Larry
+: @display
+: @i{AWK is a language similar to PERL, only considerably more elegant.} @*
+: Arnold Robbins
+: @sp 1
+: @i{Hey!} @*
+: Larry Wall
+: @end display
+:
+: Before I actually release this for publication, I wanted to get your
+: permission to quote you. (Hopefully, in the spirit of much of GNU, the
+: implied humor is visible... :-)
+
+I think that would be fine.
+
+Larry
+@end ignore
+@cindex PERL
+@cindex Wall, Larry
+@display
+@i{AWK is a language similar to PERL, only considerably more elegant.}
+Arnold Robbins
+
+@i{Hey!}
+Larry Wall
+@end display
+@sp 1
+
+This section briefly lists extensions and possible improvements
+that indicate the directions we are
+currently considering for @code{gawk}. The file @file{FUTURES} in the
+@code{gawk} distributions lists these extensions as well.
+
+This is a list of probable future changes that will be usable by the
+@code{awk} language programmer.
+
+@c these are ordered by likelihood
+@table @asis
+@item Localization
+The GNU project is starting to support multiple languages.
+It will at least be possible to make @code{gawk} print its warnings and
+error messages in languages other than English.
+It may be possible for @code{awk} programs to also use the multiple
+language facilities, separate from @code{gawk} itself.
+
+@item Databases
+It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
+
+@item A @code{PROCINFO} Array
+The special files that provide process-related information
+(@pxref{Special Files, ,Special File Names in @code{gawk}})
+may be superseded by a @code{PROCINFO} array that would provide the same
+information, in an easier to access fashion.
+
+@item More @code{lint} warnings
+There are more things that could be checked for portability.
+
+@item Control of subprocess environment
+Changes made in @code{gawk} to the array @code{ENVIRON} may be
+propagated to subprocesses run by @code{gawk}.
+
+@ignore
+@item @code{RECLEN} variable for fixed length records
+Along with @code{FIELDWIDTHS}, this would speed up the processing of
+fixed-length records.
+
+@item A @code{restart} keyword
+After modifying @code{$0}, @code{restart} would restart the pattern
+matching loop, without reading a new record from the input.
+
+@item A @samp{|&} redirection
+The @samp{|&} redirection, in place of @samp{|}, would open a two-way
+pipeline for communication with a sub-process (via @code{getline} and
+@code{print} and @code{printf}).
+
+@item Function valued variables
+It would be possible to assign the name of a user-defined or built-in
+function to a regular @code{awk} variable, and then call the function
+indirectly, by using the regular variable. This would make it possible
+to write general purpose sorting and comparing routines, for example,
+by simply passing the name of one function into another.
+
+@item A built-in @code{stat} function
+The @code{stat} function would provide an easy-to-use hook to the
+@code{stat} system call so that @code{awk} programs could determine information
+about files.
+
+@item A built-in @code{ftw} function
+Combined with function valued variables and the @code{stat} function,
+@code{ftw} (file tree walk) would make it easy for an @code{awk} program
+to walk an entire file tree.
+@end ignore
+@end table
+
+This is a list of probable improvements that will make @code{gawk}
+perform better.
+
+@table @asis
+@item An Improved Version of @code{dfa}
+The @code{dfa} pattern matcher from GNU @code{grep} has some
+problems. Either a new version or a fixed one will deal with some
+important regexp matching issues.
+
+@item Use of GNU @code{malloc}
+The GNU version of @code{malloc} could potentially speed up @code{gawk},
+since it relies heavily on the use of dynamic memory allocation.
+
+@item Use of the @code{rx} regexp library
+The @code{rx} regular expression library could potentially speed up
+all regexp operations that require knowing the exact location of matches.
+This includes record termination, field and array splitting,
+and the @code{sub}, @code{gsub}, @code{gensub} and @code{match} functions.
+@end table
+
+@node Improvements, , Future Extensions, Notes
+@appendixsec Suggestions for Improvements
+
+Here are some projects that would-be @code{gawk} hackers might like to take
+on. They vary in size from a few days to a few weeks of programming,
+depending on which one you choose and how fast a programmer you are. Please
+send any improvements you write to the maintainers at the GNU project.
+@xref{Adding Code, , Adding New Features},
+for guidelines to follow when adding new features to @code{gawk}.
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on
+contacting the maintainers.
+
+@enumerate
+@item
+Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
+parser to convert the script given it into a syntax tree; the syntax
+tree is then executed by a simple recursive evaluator. This method incurs
+a lot of overhead, since the recursive evaluator performs many procedure
+calls to do even the simplest things.
+
+It should be possible for @code{gawk} to convert the script's parse tree
+into a C program which the user would then compile, using the normal
+C compiler and a special @code{gawk} library to provide all the needed
+functions (regexps, fields, associative arrays, type coercion, and so
+on).
+
+An easier possibility might be for an intermediate phase of @code{awk} to
+convert the parse tree into a linear byte code form like the one used
+in GNU Emacs Lisp. The recursive evaluator would then be replaced by
+a straight line byte code interpreter that would be intermediate in speed
+between running a compiled program and doing what @code{gawk} does
+now.
+
+@item
+The programs in the test suite could use documenting in this @value{DOCUMENT}.
+
+@item
+See the @file{FUTURES} file for more ideas. Contact us if you would
+seriously like to tackle any of the items listed there.
+@end enumerate
+
+@node Glossary, Copying, Notes, Top
+@appendix Glossary
+
+@table @asis
+@item Action
+A series of @code{awk} statements attached to a rule. If the rule's
+pattern matches an input record, @code{awk} executes the
+rule's action. Actions are always enclosed in curly braces.
+@xref{Action Overview, ,Overview of Actions}.
+
+@item Amazing @code{awk} Assembler
+Henry Spencer at the University of Toronto wrote a retargetable assembler
+completely as @code{awk} scripts. It is thousands of lines long, including
+machine descriptions for several eight-bit microcomputers.
+It is a good example of a
+program that would have been better written in another language.
+
+@item Amazingly Workable Formatter (@code{awf})
+Henry Spencer at the University of Toronto wrote a formatter that accepts
+a large subset of the @samp{nroff -ms} and @samp{nroff -man} formatting
+commands, using @code{awk} and @code{sh}.
+
+@item ANSI
+The American National Standards Institute. This organization produces
+many standards, among them the standards for the C and C++ programming
+languages.
+
+@item Assignment
+An @code{awk} expression that changes the value of some @code{awk}
+variable or data object. An object that you can assign to is called an
+@dfn{lvalue}. The assigned values are called @dfn{rvalues}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+@item @code{awk} Language
+The language in which @code{awk} programs are written.
+
+@item @code{awk} Program
+An @code{awk} program consists of a series of @dfn{patterns} and
+@dfn{actions}, collectively known as @dfn{rules}. For each input record
+given to the program, the program's rules are all processed in turn.
+@code{awk} programs may also contain function definitions.
+
+@item @code{awk} Script
+Another name for an @code{awk} program.
+
+@item Bash
+The GNU version of the standard shell (the Bourne-Again shell).
+See ``Bourne Shell.''
+
+@item BBS
+See ``Bulletin Board System.''
+
+@item Boolean Expression
+Named after the English mathematician Boole. See ``Logical Expression.''
+
+@item Bourne Shell
+The standard shell (@file{/bin/sh}) on Unix and Unix-like systems,
+originally written by Steven R.@: Bourne.
+Many shells (Bash, @code{ksh}, @code{pdksh}, @code{zsh}) are
+generally upwardly compatible with the Bourne shell.
+
+@item Built-in Function
+The @code{awk} language provides built-in functions that perform various
+numerical, time stamp related, and string computations. Examples are
+@code{sqrt} (for the square root of a number) and @code{substr} (for a
+substring of a string). @xref{Built-in, ,Built-in Functions}.
+
+@item Built-in Variable
+@code{ARGC}, @code{ARGIND}, @code{ARGV}, @code{CONVFMT}, @code{ENVIRON},
+@code{ERRNO}, @code{FIELDWIDTHS}, @code{FILENAME}, @code{FNR}, @code{FS},
+@code{IGNORECASE}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
+@code{RLENGTH}, @code{RSTART}, @code{RS}, @code{RT}, and @code{SUBSEP},
+are the variables that have special meaning to @code{awk}.
+Changing some of them affects @code{awk}'s running environment.
+Several of these variables are specific to @code{gawk}.
+@xref{Built-in Variables}.
+
+@item Braces
+See ``Curly Braces.''
+
+@item Bulletin Board System
+A computer system allowing users to log in and read and/or leave messages
+for other users of the system, much like leaving paper notes on a bulletin
+board.
+
+@item C
+The system programming language that most GNU software is written in. The
+@code{awk} programming language has C-like syntax, and this @value{DOCUMENT}
+points out similarities between @code{awk} and C when appropriate.
+
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+@item Character Set
+The set of numeric codes used by a computer system to represent the
+characters (letters, numbers, punctuation, etc.) of a particular country
+or place. The most common character set in use today is ASCII (American
+Standard Code for Information Interchange). Many European
+countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1).
+
+@item CHEM
+A preprocessor for @code{pic} that reads descriptions of molecules
+and produces @code{pic} input for drawing them. It was written in @code{awk}
+by Brian Kernighan and Jon Bentley, and is available from
+@code{@w{netlib@@research.att.com}}.
+
+@item Compound Statement
+A series of @code{awk} statements, enclosed in curly braces. Compound
+statements may be nested.
+@xref{Statements, ,Control Statements in Actions}.
+
+@item Concatenation
+Concatenating two strings means sticking them together, one after another,
+giving a new string. For example, the string @samp{foo} concatenated with
+the string @samp{bar} gives the string @samp{foobar}.
+@xref{Concatenation, ,String Concatenation}.
+
+@item Conditional Expression
+An expression using the @samp{?:} ternary operator, such as
+@samp{@var{expr1} ? @var{expr2} : @var{expr3}}. The expression
+@var{expr1} is evaluated; if the result is true, the value of the whole
+expression is the value of @var{expr2}, otherwise the value is
+@var{expr3}. In either case, only one of @var{expr2} and @var{expr3}
+is evaluated. @xref{Conditional Exp, ,Conditional Expressions}.
+
+@item Comparison Expression
+A relation that is either true or false, such as @samp{(a < b)}.
+Comparison expressions are used in @code{if}, @code{while}, @code{do},
+and @code{for}
+statements, and in patterns to select which input records to process.
+@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+@item Curly Braces
+The characters @samp{@{} and @samp{@}}. Curly braces are used in
+@code{awk} for delimiting actions, compound statements, and function
+bodies.
+
+@item Dark Corner
+An area in the language where specifications often were (or still
+are) not clear, leading to unexpected or undesirable behavior.
+Such areas are marked in this @value{DOCUMENT} with ``(d.c.)'' in the
+text, and are indexed under the heading ``dark corner.''
+
+@item Data Objects
+These are numbers and strings of characters. Numbers are converted into
+strings and vice versa, as needed.
+@xref{Conversion, ,Conversion of Strings and Numbers}.
+
+@item Double Precision
+An internal representation of numbers that can have fractional parts.
+Double precision numbers keep track of more digits than do single precision
+numbers, but operations on them are more expensive. This is the way
+@code{awk} stores numeric values. It is the C type @code{double}.
+
+@item Dynamic Regular Expression
+A dynamic regular expression is a regular expression written as an
+ordinary expression. It could be a string constant, such as
+@code{"foo"}, but it may also be an expression whose value can vary.
+@xref{Computed Regexps, , Using Dynamic Regexps}.
+
+@item Environment
+A collection of strings, of the form @var{name@code{=}val}, that each
+program has available to it. Users generally place values into the
+environment in order to provide information to various programs. Typical
+examples are the environment variables @code{HOME} and @code{PATH}.
+
+@item Empty String
+See ``Null String.''
+
+@item Escape Sequences
+A special sequence of characters used for describing non-printing
+characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
+ESC (escape) character. @xref{Escape Sequences}.
+
+@item Field
+When @code{awk} reads an input record, it splits the record into pieces
+separated by whitespace (or by a separator regexp which you can
+change by setting the built-in variable @code{FS}). Such pieces are
+called fields. If the pieces are of fixed length, you can use the built-in
+variable @code{FIELDWIDTHS} to describe their lengths.
+@xref{Field Separators, ,Specifying How Fields are Separated},
+and also see
+@xref{Constant Size, , Reading Fixed-width Data}.
+
+@item Floating Point Number
+Often referred to in mathematical terms as a ``rational'' number, this is
+just a number that can have a fractional part.
+See ``Double Precision'' and ``Single Precision.''
+
+@item Format
+Format strings are used to control the appearance of output in the
+@code{printf} statement. Also, data conversions from numbers to strings
+are controlled by the format string contained in the built-in variable
+@code{CONVFMT}. @xref{Control Letters, ,Format-Control Letters}.
+
+@item Function
+A specialized group of statements used to encapsulate general
+or program-specific tasks. @code{awk} has a number of built-in
+functions, and also allows you to define your own.
+@xref{Built-in, ,Built-in Functions},
+and @ref{User-defined, ,User-defined Functions}.
+
+@item FSF
+See ``Free Software Foundation.''
+
+@item Free Software Foundation
+A non-profit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor. GNU Emacs is the most widely used version of Emacs today.
+
+@item @code{gawk}
+The GNU implementation of @code{awk}.
+
+@item General Public License
+This document describes the terms under which @code{gawk} and its source
+code may be distributed. (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE})
+
+@item GNU
+``GNU's not Unix''. An on-going project of the Free Software Foundation
+to create a complete, freely distributable, POSIX-compliant computing
+environment.
+
+@item GPL
+See ``General Public License.''
+
+@item Hexadecimal
+Base 16 notation, where the digits are @code{0}-@code{9} and
+@code{A}-@code{F}, with @samp{A}
+representing 10, @samp{B} representing 11, and so on up to @samp{F} for 15.
+Hexadecimal numbers are written in C using a leading @samp{0x},
+to indicate their base. Thus, @code{0x12} is 18 (one times 16 plus 2).
+
+@item I/O
+Abbreviation for ``Input/Output,'' the act of moving data into and/or
+out of a running program.
+
+@item Input Record
+A single chunk of data read in by @code{awk}. Usually, an @code{awk} input
+record consists of one line of text.
+@xref{Records, ,How Input is Split into Records}.
+
+@item Integer
+A whole number, i.e.@: a number that does not have a fractional part.
+
+@item Keyword
+In the @code{awk} language, a keyword is a word that has special
+meaning. Keywords are reserved and may not be used as variable names.
+
+@code{gawk}'s keywords are:
+@code{BEGIN},
+@code{END},
+@code{if},
+@code{else},
+@code{while},
+@code{do@dots{}while},
+@code{for},
+@code{for@dots{}in},
+@code{break},
+@code{continue},
+@code{delete},
+@code{next},
+@code{nextfile},
+@code{function},
+@code{func},
+and @code{exit}.
+
+@item Logical Expression
+An expression using the operators for logic, AND, OR, and NOT, written
+@samp{&&}, @samp{||}, and @samp{!} in @code{awk}. Often called Boolean
+expressions, after the mathematician who pioneered this kind of
+mathematical logic.
+
+@item Lvalue
+An expression that can appear on the left side of an assignment
+operator. In most languages, lvalues can be variables or array
+elements. In @code{awk}, a field designator can also be used as an
+lvalue.
+
+@item Null String
+A string with no characters in it. It is represented explicitly in
+@code{awk} programs by placing two double-quote characters next to
+each other (@code{""}). It can appear in input data by having two successive
+occurrences of the field separator appear next to each other.
+
+@item Number
+A numeric valued data object. The @code{gawk} implementation uses double
+precision floating point to represent numbers.
+Very old @code{awk} implementations use single precision floating
+point.
+
+@item Octal
+Base-eight notation, where the digits are @code{0}-@code{7}.
+Octal numbers are written in C using a leading @samp{0},
+to indicate their base. Thus, @code{013} is 11 (one times 8 plus 3).
+
+@item Pattern
+Patterns tell @code{awk} which input records are interesting to which
+rules.
+
+A pattern is an arbitrary conditional expression against which input is
+tested. If the condition is satisfied, the pattern is said to @dfn{match}
+the input record. A typical pattern might compare the input record against
+a regular expression. @xref{Pattern Overview, ,Pattern Elements}.
+
+@item POSIX
+The name for a series of standards being developed by the IEEE
+that specify a Portable Operating System interface. The ``IX'' denotes
+the Unix heritage of these standards. The main standard of interest for
+@code{awk} users is
+@cite{IEEE Standard for Information Technology, Standard 1003.2-1992,
+Portable Operating System Interface (POSIX) Part 2: Shell and Utilities}.
+Informally, this standard is often referred to as simply ``P1003.2.''
+
+@item Private
+Variables and/or functions that are meant for use exclusively by library
+functions, and not for the main @code{awk} program. Special care must be
+taken when naming such variables and functions.
+@xref{Library Names, , Naming Library Function Global Variables}.
+
+@item Range (of input lines)
+A sequence of consecutive lines from the input file. A pattern
+can specify ranges of input lines for @code{awk} to process, or it can
+specify single lines. @xref{Pattern Overview, ,Pattern Elements}.
+
+@item Recursion
+When a function calls itself, either directly or indirectly.
+If this isn't clear, refer to the entry for ``recursion.''
+
+@item Redirection
+Redirection means performing input from other than the standard input
+stream, or output to other than the standard output stream.
+
+You can redirect the output of the @code{print} and @code{printf} statements
+to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
+operators. You can redirect input to the @code{getline} statement using
+the @samp{<} and @samp{|} operators.
+@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}},
+and @ref{Getline, ,Explicit Input with @code{getline}}.
+
+@item Regexp
+Short for @dfn{regular expression}. A regexp is a pattern that denotes a
+set of strings, possibly an infinite set. For example, the regexp
+@samp{R.*xp} matches any string starting with the letter @samp{R}
+and ending with the letters @samp{xp}. In @code{awk}, regexps are
+used in patterns and in conditional expressions. Regexps may contain
+escape sequences. @xref{Regexp, ,Regular Expressions}.
+
+@item Regular Expression
+See ``regexp.''
+
+@item Regular Expression Constant
+A regular expression constant is a regular expression written within
+slashes, such as @code{/foo/}. This regular expression is chosen
+when you write the @code{awk} program, and cannot be changed doing
+its execution. @xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Rule
+A segment of an @code{awk} program that specifies how to process single
+input records. A rule consists of a @dfn{pattern} and an @dfn{action}.
+@code{awk} reads an input record; then, for each rule, if the input record
+satisfies the rule's pattern, @code{awk} executes the rule's action.
+Otherwise, the rule does nothing for that input record.
+
+@item Rvalue
+A value that can appear on the right side of an assignment operator.
+In @code{awk}, essentially every expression has a value. These values
+are rvalues.
+
+@item @code{sed}
+See ``Stream Editor.''
+
+@item Short-Circuit
+The nature of the @code{awk} logical operators @samp{&&} and @samp{||}.
+If the value of the entire expression can be deduced from evaluating just
+the left-hand side of these operators, the right-hand side will not
+be evaluated
+(@pxref{Boolean Ops, ,Boolean Expressions}).
+
+@item Side Effect
+A side effect occurs when an expression has an effect aside from merely
+producing a value. Assignment expressions, increment and decrement
+expressions and function calls have side effects.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+@item Single Precision
+An internal representation of numbers that can have fractional parts.
+Single precision numbers keep track of fewer digits than do double precision
+numbers, but operations on them are less expensive in terms of CPU time.
+This is the type used by some very old versions of @code{awk} to store
+numeric values. It is the C type @code{float}.
+
+@item Space
+The character generated by hitting the space bar on the keyboard.
+
+@item Special File
+A file name interpreted internally by @code{gawk}, instead of being handed
+directly to the underlying operating system. For example, @file{/dev/stderr}.
+@xref{Special Files, ,Special File Names in @code{gawk}}.
+
+@item Stream Editor
+A program that reads records from an input stream and processes them one
+or more at a time. This is in contrast with batch programs, which may
+expect to read their input files in entirety before starting to do
+anything, and with interactive programs, which require input from the
+user.
+
+@item String
+A datum consisting of a sequence of characters, such as @samp{I am a
+string}. Constant strings are written with double-quotes in the
+@code{awk} language, and may contain escape sequences.
+@xref{Escape Sequences}.
+
+@item Tab
+The character generated by hitting the @kbd{TAB} key on the keyboard.
+It usually expands to up to eight spaces upon output.
+
+@item Unix
+A computer operating system originally developed in the early 1970's at
+AT&T Bell Laboratories. It initially became popular in universities around
+the world, and later moved into commercial evnironments as a software
+development system and network server system. There are many commercial
+versions of Unix, as well as several work-alike systems whose source code
+is freely available (such as Linux, NetBSD, and FreeBSD).
+
+@item Whitespace
+A sequence of space, tab, or newline characters occurring inside an input
+record or a string.
+@end table
+
+@node Copying, Index, Glossary, Top
+@unnumbered GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place --- Suite 330, Boston, MA 02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@c fakenode --- for prepinfo
+@unnumberedsec Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@c fakenode --- for prepinfo
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate 0
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term ``modification''.) Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License. (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code. (This alternative is
+allowed only for non-commercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@c fakenode --- for prepinfo
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW@. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE@. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU@. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@c fakenode --- for prepinfo
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@c fakenode --- for prepinfo
+@unnumberedsec How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and an idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE@. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place --- Suite 330, Boston, MA 02111-1307, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
+type `show w'. This is free software, and you are welcome
+to redistribute it under certain conditions; type `show c'
+for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here is a sample; alter the names:
+
+@smallexample
+@group
+Yoyodyne, Inc., hereby disclaims all copyright
+interest in the program `Gnomovision'
+(which makes passes at compilers) written
+by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end group
+@end smallexample
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+@node Index, , Copying, Top
+@unnumbered Index
+@printindex cp
+
+@summarycontents
+@contents
+@bye
+
+Unresolved Issues:
+------------------
+1. From ADR.
+
+ Robert J. Chassell points out that awk programs should have some indication
+ of how to use them. It would be useful to perhaps have a "programming
+ style" section of the manual that would include this and other tips.
+
+2. The default AWKPATH search path should be configurable via `configure'
+ The default and how this changes needs to be documented.
+
+Consistency issues:
+ /.../ regexps are in @code, not @samp
+ ".." strings are in @code, not @samp
+ no @print before @dots
+ values of expressions in the text (@code{x} has the value 15),
+ should be in roman, not @code
+ Use tab and not TAB
+ Use ESC and not ESCAPE
+ Use space and not blank to describe the space bar's character
+ The term "blank" is thus basically reserved for "blank lines" etc.
+ The `(d.c.)' should appear inside the closing `.' of a sentence
+ It should come before (pxref{...})
+ " " should have an @w{} around it
+ Use "non-" everywhere
+ Use @code{ftp} when talking about anonymous ftp
+ Use upper-case and lower-case, not "upper case" and "lower case"
+ Use alphanumeric, not alpha-numeric
+ Use --foo, not -Wfoo when describing long options
+ Use findex for all programs and functions in the example chapters
+ Use "Bell Laboratories", but not "Bell Labs".
+ Use "behavior" instead of "behaviour".
+ Use "zeros" instead of "zeroes".
+ Use "Input/Output", not "input/output". Also "I/O", not "i/o".
+ Use @code{do}, and not @code{do}-@code{while}, except where
+ actually discussing the do-while.
+ The words "a", "and", "as", "between", "for", "from", "in", "of",
+ "on", "that", "the", "to", "with", and "without",
+ should not be capitalized in @chapter, @section etc.
+ "Into" and "How" should.
+ Search for @dfn; make sure important items are also indexed.
+ "e.g." should always be followed by a comma.
+ "i.e." should never be followed by a comma, and should be followed
+ by `@:'.
+ The numbers zero through ten should be spelled out, except when
+ talking about file descriptor numbers. > 10 and < 0, it's
+ ok to use numbers.
+ In tables, put command line options in @code, while in the text,
+ put them in @samp.
+ When using @strong, use "Note:" or "Caution:" with colons and
+ not exclamation points. Do not surround the paragraphs
+ with @quotation ... @end quotation.
+
+Date: Wed, 13 Apr 94 15:20:52 -0400
+From: rsm@gnu.ai.mit.edu (Richard Stallman)
+To: gnu-prog@gnu.ai.mit.edu
+Subject: A reminder: no pathnames in GNU
+
+It's a GNU convention to use the term "file name" for the name of a
+file, never "pathname". We use the term "path" for search paths,
+which are lists of file names. Using it for a single file name as
+well is potentially confusing to users.
+
+So please check any documentation you maintain, if you think you might
+have used "pathname".
+
+Note that "file name" should be two words when it appears as ordinary
+text. It's ok as one word when it's a metasyntactic variable, though.
+
+Suggestions:
+------------
+Enhance FIELDWIDTHS with some way to indicate "the rest of the record".
+E.g., a length of 0 or -1 or something. May be "n"?
+
+Make FIELDWIDTHS be an array?
+
+What if FIELDWIDTHS has invalid values in it?
diff --git a/contrib/awk/eval.c b/contrib/awk/eval.c
new file mode 100644
index 0000000..aa2e881
--- /dev/null
+++ b/contrib/awk/eval.c
@@ -0,0 +1,1720 @@
+/*
+ * eval.c - gawk parse tree interpreter
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+
+#include <assert.h>
+
+extern double pow P((double x, double y));
+extern double modf P((double x, double *yp));
+extern double fmod P((double x, double y));
+
+static int eval_condition P((NODE *tree));
+static NODE *op_assign P((NODE *tree));
+static NODE *func_call P((NODE *name, NODE *arg_list));
+static NODE *match_op P((NODE *tree));
+static void push_args P((int count, NODE *arglist, NODE **oldstack, char *func_name));
+static void pop_fcall_stack P((void));
+static void pop_fcall P((void));
+static int in_function P((void));
+char *nodetype2str P((NODETYPE type));
+char *flags2str P((int flagval));
+
+#if __GNUC__ < 2
+NODE *_t; /* used as a temporary in macros */
+#endif
+#ifdef MSDOS
+double _msc51bug; /* to get around a bug in MSC 5.1 */
+#endif
+NODE *ret_node;
+int OFSlen;
+int ORSlen;
+int OFMTidx;
+int CONVFMTidx;
+
+/* Macros and variables to save and restore function and loop bindings */
+/*
+ * the val variable allows return/continue/break-out-of-context to be
+ * caught and diagnosed
+ */
+#define PUSH_BINDING(stack, x, val) (memcpy((char *)(stack), (char *)(x), sizeof(jmp_buf)), val++)
+#define RESTORE_BINDING(stack, x, val) (memcpy((char *)(x), (char *)(stack), sizeof(jmp_buf)), val--)
+
+static jmp_buf loop_tag; /* always the current binding */
+static int loop_tag_valid = FALSE; /* nonzero when loop_tag valid */
+static int func_tag_valid = FALSE;
+static jmp_buf func_tag;
+extern int exiting, exit_val;
+
+/*
+ * This table is used by the regexp routines to do case independant
+ * matching. Basically, every ascii character maps to itself, except
+ * uppercase letters map to lower case ones. This table has 256
+ * entries, for ISO 8859-1. Note also that if the system this
+ * is compiled on doesn't use 7-bit ascii, casetable[] should not be
+ * defined to the linker, so gawk should not load.
+ *
+ * Do NOT make this array static, it is used in several spots, not
+ * just in this file.
+ */
+#if 'a' == 97 /* it's ascii */
+char casetable[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ /* ' ' '!' '"' '#' '$' '%' '&' ''' */
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ /* '(' ')' '*' '+' ',' '-' '.' '/' */
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ /* '0' '1' '2' '3' '4' '5' '6' '7' */
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ /* '8' '9' ':' ';' '<' '=' '>' '?' */
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ /* 'x' 'y' 'z' '{' '|' '}' '~' */
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+#ifndef USE_PURE_ASCII
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+#else
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+#endif
+};
+#else
+#include "You lose. You will need a translation table for your character set."
+#endif
+
+/*
+ * This table maps node types to strings for debugging.
+ * KEEP IN SYNC WITH awk.h!!!!
+ */
+static char *nodetypes[] = {
+ "Node_illegal",
+ "Node_times",
+ "Node_quotient",
+ "Node_mod",
+ "Node_plus",
+ "Node_minus",
+ "Node_cond_pair",
+ "Node_subscript",
+ "Node_concat",
+ "Node_exp",
+ "Node_preincrement",
+ "Node_predecrement",
+ "Node_postincrement",
+ "Node_postdecrement",
+ "Node_unary_minus",
+ "Node_field_spec",
+ "Node_assign",
+ "Node_assign_times",
+ "Node_assign_quotient",
+ "Node_assign_mod",
+ "Node_assign_plus",
+ "Node_assign_minus",
+ "Node_assign_exp",
+ "Node_and",
+ "Node_or",
+ "Node_equal",
+ "Node_notequal",
+ "Node_less",
+ "Node_greater",
+ "Node_leq",
+ "Node_geq",
+ "Node_match",
+ "Node_nomatch",
+ "Node_not",
+ "Node_rule_list",
+ "Node_rule_node",
+ "Node_statement_list",
+ "Node_if_branches",
+ "Node_expression_list",
+ "Node_param_list",
+ "Node_K_if",
+ "Node_K_while",
+ "Node_K_for",
+ "Node_K_arrayfor",
+ "Node_K_break",
+ "Node_K_continue",
+ "Node_K_print",
+ "Node_K_printf",
+ "Node_K_next",
+ "Node_K_exit",
+ "Node_K_do",
+ "Node_K_return",
+ "Node_K_delete",
+ "Node_K_getline",
+ "Node_K_function",
+ "Node_K_nextfile",
+ "Node_redirect_output",
+ "Node_redirect_append",
+ "Node_redirect_pipe",
+ "Node_redirect_pipein",
+ "Node_redirect_input",
+ "Node_var",
+ "Node_var_array",
+ "Node_val",
+ "Node_builtin",
+ "Node_line_range",
+ "Node_in_array",
+ "Node_func",
+ "Node_func_call",
+ "Node_cond_exp",
+ "Node_regex",
+ "Node_hashnode",
+ "Node_ahash",
+ "Node_NF",
+ "Node_NR",
+ "Node_FNR",
+ "Node_FS",
+ "Node_RS",
+ "Node_FIELDWIDTHS",
+ "Node_IGNORECASE",
+ "Node_OFS",
+ "Node_ORS",
+ "Node_OFMT",
+ "Node_CONVFMT",
+ "Node_final",
+ NULL
+};
+
+char *
+nodetype2str(type)
+NODETYPE type;
+{
+ static char buf[40];
+
+ if (type >= Node_illegal && type <= Node_final)
+ return nodetypes[(int) type];
+
+ sprintf(buf, "unknown nodetype %d", (int) type);
+ return buf;
+}
+
+/* flags2str --- make a flags value readable */
+
+char *
+flags2str(flagval)
+int flagval;
+{
+ static char buffer[BUFSIZ];
+ char *sp;
+
+ sp = buffer;
+
+ if (flagval & MALLOC) {
+ strcpy(sp, "MALLOC");
+ sp += strlen(sp);
+ }
+ if (flagval & TEMP) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "TEMP");
+ sp += strlen(sp);
+ }
+ if (flagval & PERM) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "PERM");
+ sp += strlen(sp);
+ }
+ if (flagval & STRING) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "STRING");
+ sp += strlen(sp);
+ }
+ if (flagval & STR) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "STR");
+ sp += strlen(sp);
+ }
+ if (flagval & NUM) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "NUM");
+ sp += strlen(sp);
+ }
+ if (flagval & NUMBER) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "NUMBER");
+ sp += strlen(sp);
+ }
+ if (flagval & MAYBE_NUM) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "MAYBE_NUM");
+ sp += strlen(sp);
+ }
+ if (flagval & ARRAYMAXED) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "ARRAYMAXED");
+ sp += strlen(sp);
+ }
+ if (flagval & SCALAR) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "SCALAR");
+ sp += strlen(sp);
+ }
+ if (flagval & FUNC) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "FUNC");
+ sp += strlen(sp);
+ }
+ if (flagval & FIELD) {
+ if (sp != buffer)
+ *sp++ = '|';
+ strcpy(sp, "FIELD");
+ sp += strlen(sp);
+ }
+
+ return buffer;
+}
+
+/*
+ * interpret:
+ * Tree is a bunch of rules to run. Returns zero if it hit an exit()
+ * statement
+ */
+int
+interpret(tree)
+register NODE *volatile tree;
+{
+ jmp_buf volatile loop_tag_stack; /* shallow binding stack for loop_tag */
+ static jmp_buf rule_tag; /* tag the rule currently being run, for NEXT
+ * and EXIT statements. It is static because
+ * there are no nested rules */
+ register NODE *volatile t = NULL; /* temporary */
+ NODE **volatile lhs; /* lhs == Left Hand Side for assigns, etc */
+ NODE *volatile stable_tree;
+ int volatile traverse = TRUE; /* True => loop thru tree (Node_rule_list) */
+
+ /* avoid false source indications */
+ source = NULL;
+ sourceline = 0;
+
+ if (tree == NULL)
+ return 1;
+ sourceline = tree->source_line;
+ source = tree->source_file;
+ switch (tree->type) {
+ case Node_rule_node:
+ traverse = FALSE; /* False => one for-loop iteration only */
+ /* FALL THROUGH */
+ case Node_rule_list:
+ for (t = tree; t != NULL; t = t->rnode) {
+ if (traverse)
+ tree = t->lnode;
+ sourceline = tree->source_line;
+ source = tree->source_file;
+ switch (setjmp(rule_tag)) {
+ case 0: /* normal non-jump */
+ /* test pattern, if any */
+ if (tree->lnode == NULL ||
+ eval_condition(tree->lnode))
+ (void) interpret(tree->rnode);
+ break;
+ case TAG_CONTINUE: /* NEXT statement */
+ return 1;
+ case TAG_BREAK:
+ return 0;
+ default:
+ cant_happen();
+ }
+ if (! traverse) /* case Node_rule_node */
+ break; /* don't loop */
+ }
+ break;
+
+ case Node_statement_list:
+ for (t = tree; t != NULL; t = t->rnode)
+ (void) interpret(t->lnode);
+ break;
+
+ case Node_K_if:
+ if (eval_condition(tree->lnode))
+ (void) interpret(tree->rnode->lnode);
+ else
+ (void) interpret(tree->rnode->rnode);
+ break;
+
+ case Node_K_while:
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+
+ stable_tree = tree;
+ while (eval_condition(stable_tree->lnode)) {
+ switch (setjmp(loop_tag)) {
+ case 0: /* normal non-jump */
+ (void) interpret(stable_tree->rnode);
+ break;
+ case TAG_CONTINUE: /* continue statement */
+ break;
+ case TAG_BREAK: /* break statement */
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ }
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+
+ case Node_K_do:
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ stable_tree = tree;
+ do {
+ switch (setjmp(loop_tag)) {
+ case 0: /* normal non-jump */
+ (void) interpret(stable_tree->rnode);
+ break;
+ case TAG_CONTINUE: /* continue statement */
+ break;
+ case TAG_BREAK: /* break statement */
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ } while (eval_condition(stable_tree->lnode));
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+
+ case Node_K_for:
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ (void) interpret(tree->forloop->init);
+ stable_tree = tree;
+ while (eval_condition(stable_tree->forloop->cond)) {
+ switch (setjmp(loop_tag)) {
+ case 0: /* normal non-jump */
+ (void) interpret(stable_tree->lnode);
+ /* fall through */
+ case TAG_CONTINUE: /* continue statement */
+ (void) interpret(stable_tree->forloop->incr);
+ break;
+ case TAG_BREAK: /* break statement */
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ }
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+
+ case Node_K_arrayfor:
+ {
+ volatile struct search l; /* For array_for */
+ Func_ptr after_assign = NULL;
+
+#define hakvar forloop->init
+#define arrvar forloop->incr
+ PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ lhs = get_lhs(tree->hakvar, &after_assign);
+ t = tree->arrvar;
+ if (t->type == Node_param_list)
+ t = stack_ptr[t->param_cnt];
+ stable_tree = tree;
+ if ((t->flags & SCALAR) != 0)
+ fatal("attempt to use scalar as array");
+ for (assoc_scan(t, (struct search *)&l);
+ l.retval;
+ assoc_next((struct search *)&l)) {
+ unref(*((NODE **) lhs));
+ *lhs = dupnode(l.retval);
+ if (after_assign)
+ (*after_assign)();
+ switch (setjmp(loop_tag)) {
+ case 0:
+ (void) interpret(stable_tree->lnode);
+ case TAG_CONTINUE:
+ break;
+
+ case TAG_BREAK:
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ return 1;
+ default:
+ cant_happen();
+ }
+ }
+ RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ break;
+ }
+
+ case Node_K_break:
+ if (! loop_tag_valid) {
+ /*
+ * Old AT&T nawk treats break outside of loops like
+ * next. New ones catch it at parse time. Allow it if
+ * do_traditional is on, and complain if lint.
+ */
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("use of `break' outside a loop is not portable");
+ warned = TRUE;
+ }
+ if (! do_traditional || do_posix)
+ fatal("use of `break' outside a loop is not allowed");
+ if (in_function())
+ pop_fcall_stack();
+ longjmp(rule_tag, TAG_CONTINUE);
+ } else
+ longjmp(loop_tag, TAG_BREAK);
+ break;
+
+ case Node_K_continue:
+ if (! loop_tag_valid) {
+ /*
+ * Old AT&T nawk treats continue outside of loops like
+ * next. New ones catch it at parse time. Allow it if
+ * do_traditional is on, and complain if lint.
+ */
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("use of `continue' outside a loop is not portable");
+ warned = TRUE;
+ }
+ if (! do_traditional || do_posix)
+ fatal("use of `continue' outside a loop is not allowed");
+ if (in_function())
+ pop_fcall_stack();
+ longjmp(rule_tag, TAG_CONTINUE);
+ } else
+ longjmp(loop_tag, TAG_CONTINUE);
+ break;
+
+ case Node_K_print:
+ do_print(tree);
+ break;
+
+ case Node_K_printf:
+ do_printf(tree);
+ break;
+
+ case Node_K_delete:
+ do_delete(tree->lnode, tree->rnode);
+ break;
+
+ case Node_K_next:
+ if (in_function())
+ pop_fcall_stack();
+ longjmp(rule_tag, TAG_CONTINUE);
+ break;
+
+ case Node_K_nextfile:
+ if (in_function())
+ pop_fcall_stack();
+ do_nextfile();
+ break;
+
+ case Node_K_exit:
+ /*
+ * In A,K,&W, p. 49, it says that an exit statement "...
+ * causes the program to behave as if the end of input had
+ * occurred; no more input is read, and the END actions, if
+ * any are executed." This implies that the rest of the rules
+ * are not done. So we immediately break out of the main loop.
+ */
+ exiting = TRUE;
+ if (tree->lnode != NULL) {
+ t = tree_eval(tree->lnode);
+ exit_val = (int) force_number(t);
+ free_temp(t);
+ }
+ longjmp(rule_tag, TAG_BREAK);
+ break;
+
+ case Node_K_return:
+ t = tree_eval(tree->lnode);
+ ret_node = dupnode(t);
+ free_temp(t);
+ longjmp(func_tag, TAG_RETURN);
+ break;
+
+ default:
+ /*
+ * Appears to be an expression statement. Throw away the
+ * value.
+ */
+ if (do_lint && tree->type == Node_var)
+ warning("statement has no effect");
+ t = tree_eval(tree);
+ free_temp(t);
+ break;
+ }
+ return 1;
+}
+
+/* r_tree_eval --- evaluate a subtree */
+
+NODE *
+r_tree_eval(tree, iscond)
+register NODE *tree;
+int iscond;
+{
+ register NODE *r, *t1, *t2; /* return value & temporary subtrees */
+ register NODE **lhs;
+ register int di;
+ AWKNUM x, x1, x2;
+ long lx;
+#ifdef _CRAY
+ long lx2;
+#endif
+ char namebuf[100];
+
+#ifdef DEBUG
+ if (tree == NULL)
+ return Nnull_string;
+ else if (tree->type == Node_val) {
+ if (tree->stref <= 0)
+ cant_happen();
+ return tree;
+ } else if (tree->type == Node_var) {
+ if (tree->var_value->stref <= 0)
+ cant_happen();
+ return tree->var_value;
+ }
+#endif
+
+ if (tree->type == Node_param_list) {
+ int paramnum = tree->param_cnt + 1;
+
+ tree = stack_ptr[tree->param_cnt];
+ if (tree == NULL)
+ return Nnull_string;
+ sprintf(namebuf, "parameter #%d", paramnum);
+ tree->vname = namebuf;
+ }
+
+ switch (tree->type) {
+ case Node_var:
+ return tree->var_value;
+
+ case Node_and:
+ return tmp_number((AWKNUM) (eval_condition(tree->lnode)
+ && eval_condition(tree->rnode)));
+
+ case Node_or:
+ return tmp_number((AWKNUM) (eval_condition(tree->lnode)
+ || eval_condition(tree->rnode)));
+
+ case Node_not:
+ return tmp_number((AWKNUM) ! eval_condition(tree->lnode));
+
+ /* Builtins */
+ case Node_builtin:
+ return (*tree->proc)(tree->subnode);
+
+ case Node_K_getline:
+ return (do_getline(tree));
+
+ case Node_in_array:
+ return tmp_number((AWKNUM) in_array(tree->lnode, tree->rnode));
+
+ case Node_func_call:
+ return func_call(tree->rnode, tree->lnode);
+
+ /* unary operations */
+ case Node_NR:
+ case Node_FNR:
+ case Node_NF:
+ case Node_FIELDWIDTHS:
+ case Node_FS:
+ case Node_RS:
+ case Node_field_spec:
+ case Node_subscript:
+ case Node_IGNORECASE:
+ case Node_OFS:
+ case Node_ORS:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ lhs = get_lhs(tree, (Func_ptr *) NULL);
+ return *lhs;
+
+ case Node_var_array:
+ fatal("attempt to use array `%s' in a scalar context",
+ tree->vname);
+
+ case Node_unary_minus:
+ t1 = tree_eval(tree->subnode);
+ x = -force_number(t1);
+ free_temp(t1);
+ return tmp_number(x);
+
+ case Node_cond_exp:
+ if (eval_condition(tree->lnode))
+ return tree_eval(tree->rnode->lnode);
+ return tree_eval(tree->rnode->rnode);
+
+ case Node_match:
+ case Node_nomatch:
+ case Node_regex:
+ return match_op(tree);
+
+ case Node_func:
+ fatal("function `%s' called with space between name and (,\n%s",
+ tree->lnode->param,
+ "or used in other expression context");
+
+ /* assignments */
+ case Node_assign:
+ {
+ Func_ptr after_assign = NULL;
+
+ if (iscond && do_lint)
+ warning("assignment used in conditional context");
+ r = tree_eval(tree->rnode);
+ lhs = get_lhs(tree->lnode, &after_assign);
+ if (r != *lhs) {
+ NODE *save;
+
+ save = *lhs;
+ *lhs = dupnode(r);
+ unref(save);
+ }
+ free_temp(r);
+ tree->lnode->flags |= SCALAR;
+ if (after_assign)
+ (*after_assign)();
+ return *lhs;
+ }
+
+ case Node_concat:
+ {
+ NODE **treelist;
+ NODE **strlist;
+ NODE *save_tree;
+ register NODE **treep;
+ register NODE **strp;
+ register size_t len;
+ char *str;
+ register char *dest;
+ int count;
+
+ /*
+ * This is an efficiency hack for multiple adjacent string
+ * concatenations, to avoid recursion and string copies.
+ *
+ * Node_concat trees grow downward to the left, so
+ * descend to lowest (first) node, accumulating nodes
+ * to evaluate to strings as we go.
+ */
+
+ /*
+ * But first, no arbitrary limits. Count the number of
+ * nodes and malloc the treelist and strlist arrays.
+ * There will be count + 1 items to concatenate. We
+ * also leave room for an extra pointer at the end to
+ * use as a sentinel. Thus, start count at 2.
+ */
+ save_tree = tree;
+ for (count = 2; tree && tree->type == Node_concat; tree = tree->lnode)
+ count++;
+ tree = save_tree;
+ emalloc(treelist, NODE **, sizeof(NODE *) * count, "tree_eval");
+ emalloc(strlist, NODE **, sizeof(NODE *) * count, "tree_eval");
+
+ /* Now, here we go. */
+ treep = treelist;
+ while (tree && tree->type == Node_concat) {
+ *treep++ = tree->rnode;
+ tree = tree->lnode;
+ }
+ *treep = tree;
+ /*
+ * Now, evaluate to strings in LIFO order, accumulating
+ * the string length, so we can do a single malloc at the
+ * end.
+ */
+ strp = strlist;
+ len = 0;
+ while (treep >= treelist) {
+ *strp = force_string(tree_eval(*treep--));
+ len += (*strp)->stlen;
+ strp++;
+ }
+ *strp = NULL;
+ emalloc(str, char *, len+2, "tree_eval");
+ str[len] = str[len+1] = '\0'; /* for good measure */
+ dest = str;
+ strp = strlist;
+ while (*strp) {
+ memcpy(dest, (*strp)->stptr, (*strp)->stlen);
+ dest += (*strp)->stlen;
+ free_temp(*strp);
+ strp++;
+ }
+ r = make_str_node(str, len, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+
+ free(strlist);
+ free(treelist);
+ }
+ return r;
+
+ /* other assignment types are easier because they are numeric */
+ case Node_preincrement:
+ case Node_predecrement:
+ case Node_postincrement:
+ case Node_postdecrement:
+ case Node_assign_exp:
+ case Node_assign_times:
+ case Node_assign_quotient:
+ case Node_assign_mod:
+ case Node_assign_plus:
+ case Node_assign_minus:
+ return op_assign(tree);
+ default:
+ break; /* handled below */
+ }
+
+ /* evaluate subtrees in order to do binary operation, then keep going */
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode);
+
+ switch (tree->type) {
+ case Node_geq:
+ case Node_leq:
+ case Node_greater:
+ case Node_less:
+ case Node_notequal:
+ case Node_equal:
+ di = cmp_nodes(t1, t2);
+ free_temp(t1);
+ free_temp(t2);
+ switch (tree->type) {
+ case Node_equal:
+ return tmp_number((AWKNUM) (di == 0));
+ case Node_notequal:
+ return tmp_number((AWKNUM) (di != 0));
+ case Node_less:
+ return tmp_number((AWKNUM) (di < 0));
+ case Node_greater:
+ return tmp_number((AWKNUM) (di > 0));
+ case Node_leq:
+ return tmp_number((AWKNUM) (di <= 0));
+ case Node_geq:
+ return tmp_number((AWKNUM) (di >= 0));
+ default:
+ cant_happen();
+ }
+ break;
+ default:
+ break; /* handled below */
+ }
+
+ x1 = force_number(t1);
+ free_temp(t1);
+ x2 = force_number(t2);
+ free_temp(t2);
+ switch (tree->type) {
+ case Node_exp:
+ if ((lx = x2) == x2 && lx >= 0) { /* integer exponent */
+ if (lx == 0)
+ x = 1;
+ else if (lx == 1)
+ x = x1;
+ else {
+ /* doing it this way should be more precise */
+ for (x = x1; --lx; )
+ x *= x1;
+ }
+ } else
+ x = pow((double) x1, (double) x2);
+ return tmp_number(x);
+
+ case Node_times:
+ return tmp_number(x1 * x2);
+
+ case Node_quotient:
+ if (x2 == 0)
+ fatal("division by zero attempted");
+#ifdef _CRAY
+ /* special case for integer division, put in for Cray */
+ lx2 = x2;
+ if (lx2 == 0)
+ return tmp_number(x1 / x2);
+ lx = (long) x1 / lx2;
+ if (lx * x2 == x1)
+ return tmp_number((AWKNUM) lx);
+ else
+#endif
+ return tmp_number(x1 / x2);
+
+ case Node_mod:
+ if (x2 == 0)
+ fatal("division by zero attempted in mod");
+#ifdef HAVE_FMOD
+ return tmp_number(fmod(x1, x2));
+#else /* ! HAVE_FMOD */
+ (void) modf(x1 / x2, &x);
+ return tmp_number(x1 - x * x2);
+#endif /* ! HAVE_FMOD */
+
+ case Node_plus:
+ return tmp_number(x1 + x2);
+
+ case Node_minus:
+ return tmp_number(x1 - x2);
+
+ case Node_var_array:
+ fatal("attempt to use array `%s' in a scalar context",
+ tree->vname);
+
+ default:
+ fatal("illegal type (%s) in tree_eval", nodetype2str(tree->type));
+ }
+ return 0;
+}
+
+/* eval_condition --- is TREE true or false? Returns 0==false, non-zero==true */
+
+static int
+eval_condition(tree)
+register NODE *tree;
+{
+ register NODE *t1;
+ register int ret;
+
+ if (tree == NULL) /* Null trees are the easiest kinds */
+ return TRUE;
+ if (tree->type == Node_line_range) {
+ /*
+ * Node_line_range is kind of like Node_match, EXCEPT: the
+ * lnode field (more properly, the condpair field) is a node
+ * of a Node_cond_pair; whether we evaluate the lnode of that
+ * node or the rnode depends on the triggered word. More
+ * precisely: if we are not yet triggered, we tree_eval the
+ * lnode; if that returns true, we set the triggered word.
+ * If we are triggered (not ELSE IF, note), we tree_eval the
+ * rnode, clear triggered if it succeeds, and perform our
+ * action (regardless of success or failure). We want to be
+ * able to begin and end on a single input record, so this
+ * isn't an ELSE IF, as noted above.
+ */
+ if (! tree->triggered)
+ if (! eval_condition(tree->condpair->lnode))
+ return FALSE;
+ else
+ tree->triggered = TRUE;
+ /* Else we are triggered */
+ if (eval_condition(tree->condpair->rnode))
+ tree->triggered = FALSE;
+ return TRUE;
+ }
+
+ /*
+ * Could just be J.random expression. in which case, null and 0 are
+ * false, anything else is true
+ */
+
+ t1 = m_tree_eval(tree, TRUE);
+ if (t1->flags & MAYBE_NUM)
+ (void) force_number(t1);
+ if (t1->flags & NUMBER)
+ ret = (t1->numbr != 0.0);
+ else
+ ret = (t1->stlen != 0);
+ free_temp(t1);
+ return ret;
+}
+
+/* cmp_nodes --- compare two nodes, returning negative, 0, positive */
+
+int
+cmp_nodes(t1, t2)
+register NODE *t1, *t2;
+{
+ register int ret;
+ register size_t len1, len2;
+ register int l;
+ int ldiff;
+
+ if (t1 == t2)
+ return 0;
+ if (t1->flags & MAYBE_NUM)
+ (void) force_number(t1);
+ if (t2->flags & MAYBE_NUM)
+ (void) force_number(t2);
+ if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) {
+ if (t1->numbr == t2->numbr)
+ return 0;
+ /* don't subtract, in case one or both are infinite */
+ else if (t1->numbr < t2->numbr)
+ return -1;
+ else
+ return 1;
+ }
+ (void) force_string(t1);
+ (void) force_string(t2);
+ len1 = t1->stlen;
+ len2 = t2->stlen;
+ ldiff = len1 - len2;
+ if (len1 == 0 || len2 == 0)
+ return ldiff;
+ l = (ldiff <= 0 ? len1 : len2);
+ if (IGNORECASE) {
+ register unsigned char *cp1 = (unsigned char *) t1->stptr;
+ register unsigned char *cp2 = (unsigned char *) t2->stptr;
+
+ for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
+ ret = casetable[*cp1] - casetable[*cp2];
+ } else
+ ret = memcmp(t1->stptr, t2->stptr, l);
+ return (ret == 0 ? ldiff : ret);
+}
+
+/* op_assign --- do +=, -=, etc. */
+
+static NODE *
+op_assign(tree)
+register NODE *tree;
+{
+ AWKNUM rval, lval;
+ NODE **lhs;
+ AWKNUM t1, t2;
+ long ltemp;
+ NODE *tmp;
+ Func_ptr after_assign = NULL;
+
+ lhs = get_lhs(tree->lnode, &after_assign);
+ lval = force_number(*lhs);
+
+ /*
+ * Can't unref *lhs until we know the type; doing so
+ * too early breaks x += x sorts of things.
+ */
+ switch(tree->type) {
+ case Node_preincrement:
+ case Node_predecrement:
+ unref(*lhs);
+ *lhs = make_number(lval +
+ (tree->type == Node_preincrement ? 1.0 : -1.0));
+ tree->lnode->flags |= SCALAR;
+ if (after_assign)
+ (*after_assign)();
+ return *lhs;
+
+ case Node_postincrement:
+ case Node_postdecrement:
+ unref(*lhs);
+ *lhs = make_number(lval +
+ (tree->type == Node_postincrement ? 1.0 : -1.0));
+ tree->lnode->flags |= SCALAR;
+ if (after_assign)
+ (*after_assign)();
+ return tmp_number(lval);
+ default:
+ break; /* handled below */
+ }
+
+ tmp = tree_eval(tree->rnode);
+ rval = force_number(tmp);
+ free_temp(tmp);
+
+ /*
+ * Do this again; the lhs and the rhs could both be fields.
+ * Accessing the rhs could cause the lhs to have moved around.
+ * (Yet another special case. Gack.)
+ */
+ lhs = get_lhs(tree->lnode, &after_assign);
+
+ unref(*lhs);
+ switch(tree->type) {
+ case Node_assign_exp:
+ if ((ltemp = rval) == rval) { /* integer exponent */
+ if (ltemp == 0)
+ *lhs = make_number((AWKNUM) 1);
+ else if (ltemp == 1)
+ *lhs = make_number(lval);
+ else {
+ /* doing it this way should be more precise */
+ for (t1 = t2 = lval; --ltemp; )
+ t1 *= t2;
+ *lhs = make_number(t1);
+ }
+ } else
+ *lhs = make_number((AWKNUM) pow((double) lval, (double) rval));
+ break;
+
+ case Node_assign_times:
+ *lhs = make_number(lval * rval);
+ break;
+
+ case Node_assign_quotient:
+ if (rval == (AWKNUM) 0)
+ fatal("division by zero attempted in /=");
+#ifdef _CRAY
+ /* special case for integer division, put in for Cray */
+ ltemp = rval;
+ if (ltemp == 0) {
+ *lhs = make_number(lval / rval);
+ break;
+ }
+ ltemp = (long) lval / ltemp;
+ if (ltemp * lval == rval)
+ *lhs = make_number((AWKNUM) ltemp);
+ else
+#endif /* _CRAY */
+ *lhs = make_number(lval / rval);
+ break;
+
+ case Node_assign_mod:
+ if (rval == (AWKNUM) 0)
+ fatal("division by zero attempted in %%=");
+#ifdef HAVE_FMOD
+ *lhs = make_number(fmod(lval, rval));
+#else /* ! HAVE_FMOD */
+ (void) modf(lval / rval, &t1);
+ t2 = lval - rval * t1;
+ *lhs = make_number(t2);
+#endif /* ! HAVE_FMOD */
+ break;
+
+ case Node_assign_plus:
+ *lhs = make_number(lval + rval);
+ break;
+
+ case Node_assign_minus:
+ *lhs = make_number(lval - rval);
+ break;
+ default:
+ cant_happen();
+ }
+ tree->lnode->flags |= SCALAR;
+ if (after_assign)
+ (*after_assign)();
+ return *lhs;
+}
+
+static struct fcall {
+ char *fname;
+ unsigned long count;
+ NODE *arglist;
+ NODE **prevstack;
+ NODE **stack;
+} *fcall_list = NULL;
+
+static long fcall_list_size = 0;
+static long curfcall = -1;
+
+/* in_function --- return true/false if we need to unwind awk functions */
+
+static int
+in_function()
+{
+ return (curfcall >= 0);
+}
+
+/* pop_fcall --- pop off a single function call */
+
+static void
+pop_fcall()
+{
+ NODE *n, **sp, *arg, *argp;
+ int count;
+ struct fcall *f;
+
+ assert(curfcall >= 0);
+ f = & fcall_list[curfcall];
+ stack_ptr = f->prevstack;
+
+ /*
+ * here, we pop each parameter and check whether
+ * it was an array. If so, and if the arg. passed in was
+ * a simple variable, then the value should be copied back.
+ * This achieves "call-by-reference" for arrays.
+ */
+ sp = f->stack;
+ count = f->count;
+
+ for (argp = f->arglist; count > 0 && argp != NULL; argp = argp->rnode) {
+ arg = argp->lnode;
+ if (arg->type == Node_param_list)
+ arg = stack_ptr[arg->param_cnt];
+ n = *sp++;
+ if ((arg->type == Node_var || arg->type == Node_var_array)
+ && n->type == Node_var_array) {
+ /* should we free arg->var_value ? */
+ arg->var_array = n->var_array;
+ arg->type = Node_var_array;
+ arg->array_size = n->array_size;
+ arg->table_size = n->table_size;
+ arg->flags = n->flags;
+ }
+ /* n->lnode overlays the array size, don't unref it if array */
+ if (n->type != Node_var_array)
+ unref(n->lnode);
+ freenode(n);
+ count--;
+ }
+ while (count-- > 0) {
+ n = *sp++;
+ /* if n is a local array, all the elements should be freed */
+ if (n->type == Node_var_array)
+ assoc_clear(n);
+ unref(n->lnode);
+ freenode(n);
+ }
+ if (f->stack)
+ free((char *) f->stack);
+ memset(f, '\0', sizeof(struct fcall));
+ curfcall--;
+}
+
+/* pop_fcall_stack --- pop off all function args, don't leak memory */
+
+static void
+pop_fcall_stack()
+{
+ while (curfcall >= 0)
+ pop_fcall();
+}
+
+/* push_args --- push function arguments onto the stack */
+
+static void
+push_args(count, arglist, oldstack, func_name)
+int count;
+NODE *arglist;
+NODE **oldstack;
+char *func_name;
+{
+ struct fcall *f;
+ NODE *arg, *argp, *r, **sp, *n;
+
+ if (fcall_list_size == 0) { /* first time */
+ emalloc(fcall_list, struct fcall *, 10 * sizeof(struct fcall),
+ "push_args");
+ fcall_list_size = 10;
+ }
+
+ if (++curfcall >= fcall_list_size) {
+ fcall_list_size *= 2;
+ erealloc(fcall_list, struct fcall *,
+ fcall_list_size * sizeof(struct fcall), "push_args");
+ }
+ f = & fcall_list[curfcall];
+ memset(f, '\0', sizeof(struct fcall));
+
+ if (count > 0)
+ emalloc(f->stack, NODE **, count*sizeof(NODE *), "func_call");
+ f->count = count;
+ f->fname = func_name; /* not used, for debugging, just in case */
+ f->arglist = arglist;
+ f->prevstack = oldstack;
+
+ sp = f->stack;
+
+ /* for each calling arg. add NODE * on stack */
+ for (argp = arglist; count > 0 && argp != NULL; argp = argp->rnode) {
+ arg = argp->lnode;
+ getnode(r);
+ r->type = Node_var;
+
+ /* call by reference for arrays; see below also */
+ if (arg->type == Node_param_list)
+ arg = f->prevstack[arg->param_cnt];
+ if (arg->type == Node_var_array)
+ *r = *arg;
+ else {
+ n = tree_eval(arg);
+ r->lnode = dupnode(n);
+ r->rnode = (NODE *) NULL;
+ if ((n->flags & SCALAR) != 0)
+ r->flags |= SCALAR;
+ free_temp(n);
+ }
+ *sp++ = r;
+ count--;
+ }
+ if (argp != NULL) /* left over calling args. */
+ warning(
+ "function `%s' called with more arguments than declared",
+ func_name);
+
+ /* add remaining params. on stack with null value */
+ while (count-- > 0) {
+ getnode(r);
+ r->type = Node_var;
+ r->lnode = Nnull_string;
+ r->flags &= ~SCALAR;
+ r->rnode = (NODE *) NULL;
+ *sp++ = r;
+ }
+
+ /*
+ * We have to reassign f. Why, you may ask? It is possible that
+ * other functions were called during the course of tree_eval()-ing
+ * the arguments to this function. As a result of that, fcall_list
+ * may have been realloc()'ed, with the result that f is now
+ * pointing into free()'d space. This was a nasty one to track down.
+ */
+ f = & fcall_list[curfcall];
+
+ stack_ptr = f->stack;
+}
+
+/* func_call --- call a function, call by reference for arrays */
+
+NODE **stack_ptr;
+
+static NODE *
+func_call(name, arg_list)
+NODE *name; /* name is a Node_val giving function name */
+NODE *arg_list; /* Node_expression_list of calling args. */
+{
+ register NODE *r;
+ NODE *f;
+ jmp_buf volatile func_tag_stack;
+ jmp_buf volatile loop_tag_stack;
+ int volatile save_loop_tag_valid = FALSE;
+ NODE *save_ret_node;
+ extern NODE *ret_node;
+
+ /* retrieve function definition node */
+ f = lookup(name->stptr);
+ if (f == NULL || f->type != Node_func)
+ fatal("function `%s' not defined", name->stptr);
+#ifdef FUNC_TRACE
+ fprintf(stderr, "function %s called\n", name->stptr);
+#endif
+ push_args(f->lnode->param_cnt, arg_list, stack_ptr, name->stptr);
+
+ /*
+ * Execute function body, saving context, as a return statement
+ * will longjmp back here.
+ *
+ * Have to save and restore the loop_tag stuff so that a return
+ * inside a loop in a function body doesn't scrog any loops going
+ * on in the main program. We save the necessary info in variables
+ * local to this function so that function nesting works OK.
+ * We also only bother to save the loop stuff if we're in a loop
+ * when the function is called.
+ */
+ if (loop_tag_valid) {
+ int junk = 0;
+
+ save_loop_tag_valid = (volatile int) loop_tag_valid;
+ PUSH_BINDING(loop_tag_stack, loop_tag, junk);
+ loop_tag_valid = FALSE;
+ }
+ PUSH_BINDING(func_tag_stack, func_tag, func_tag_valid);
+ save_ret_node = ret_node;
+ ret_node = Nnull_string; /* default return value */
+ if (setjmp(func_tag) == 0)
+ (void) interpret(f->rnode);
+
+ r = ret_node;
+ ret_node = (NODE *) save_ret_node;
+ RESTORE_BINDING(func_tag_stack, func_tag, func_tag_valid);
+ pop_fcall();
+
+ /* Restore the loop_tag stuff if necessary. */
+ if (save_loop_tag_valid) {
+ int junk = 0;
+
+ loop_tag_valid = (int) save_loop_tag_valid;
+ RESTORE_BINDING(loop_tag_stack, loop_tag, junk);
+ }
+
+ if ((r->flags & PERM) == 0)
+ r->flags |= TEMP;
+ return r;
+}
+
+/*
+ * r_get_lhs:
+ * This returns a POINTER to a node pointer. get_lhs(ptr) is the current
+ * value of the var, or where to store the var's new value
+ */
+
+NODE **
+r_get_lhs(ptr, assign)
+register NODE *ptr;
+Func_ptr *assign;
+{
+ register NODE **aptr = NULL;
+ register NODE *n;
+
+ if (assign)
+ *assign = NULL; /* for safety */
+ if (ptr->type == Node_param_list)
+ ptr = stack_ptr[ptr->param_cnt];
+
+ switch (ptr->type) {
+ case Node_var_array:
+ fatal("attempt to use array `%s' in a scalar context",
+ ptr->vname);
+
+ case Node_var:
+ aptr = &(ptr->var_value);
+#ifdef DEBUG
+ if (ptr->var_value->stref <= 0)
+ cant_happen();
+#endif
+ break;
+
+ case Node_FIELDWIDTHS:
+ aptr = &(FIELDWIDTHS_node->var_value);
+ if (assign != NULL)
+ *assign = set_FIELDWIDTHS;
+ break;
+
+ case Node_RS:
+ aptr = &(RS_node->var_value);
+ if (assign != NULL)
+ *assign = set_RS;
+ break;
+
+ case Node_FS:
+ aptr = &(FS_node->var_value);
+ if (assign != NULL)
+ *assign = set_FS;
+ break;
+
+ case Node_FNR:
+ unref(FNR_node->var_value);
+ FNR_node->var_value = make_number((AWKNUM) FNR);
+ aptr = &(FNR_node->var_value);
+ if (assign != NULL)
+ *assign = set_FNR;
+ break;
+
+ case Node_NR:
+ unref(NR_node->var_value);
+ NR_node->var_value = make_number((AWKNUM) NR);
+ aptr = &(NR_node->var_value);
+ if (assign != NULL)
+ *assign = set_NR;
+ break;
+
+ case Node_NF:
+ if (NF == -1)
+ (void) get_field(HUGE-1, assign); /* parse record */
+ unref(NF_node->var_value);
+ NF_node->var_value = make_number((AWKNUM) NF);
+ aptr = &(NF_node->var_value);
+ if (assign != NULL)
+ *assign = set_NF;
+ break;
+
+ case Node_IGNORECASE:
+ aptr = &(IGNORECASE_node->var_value);
+ if (assign != NULL)
+ *assign = set_IGNORECASE;
+ break;
+
+ case Node_OFMT:
+ aptr = &(OFMT_node->var_value);
+ if (assign != NULL)
+ *assign = set_OFMT;
+ break;
+
+ case Node_CONVFMT:
+ aptr = &(CONVFMT_node->var_value);
+ if (assign != NULL)
+ *assign = set_CONVFMT;
+ break;
+
+ case Node_ORS:
+ aptr = &(ORS_node->var_value);
+ if (assign != NULL)
+ *assign = set_ORS;
+ break;
+
+ case Node_OFS:
+ aptr = &(OFS_node->var_value);
+ if (assign != NULL)
+ *assign = set_OFS;
+ break;
+
+ case Node_param_list:
+ aptr = &(stack_ptr[ptr->param_cnt]->var_value);
+ break;
+
+ case Node_field_spec:
+ {
+ int field_num;
+
+ n = tree_eval(ptr->lnode);
+ field_num = (int) force_number(n);
+ free_temp(n);
+ if (field_num < 0)
+ fatal("attempt to access field %d", field_num);
+ if (field_num == 0 && field0_valid) { /* short circuit */
+ aptr = &fields_arr[0];
+ if (assign != NULL)
+ *assign = reset_record;
+ break;
+ }
+ aptr = get_field(field_num, assign);
+ break;
+ }
+ case Node_subscript:
+ n = ptr->lnode;
+ if (n->type == Node_param_list) {
+ int i = n->param_cnt + 1;
+
+ n = stack_ptr[n->param_cnt];
+ if ((n->flags & SCALAR) != 0)
+ fatal("attempt to use scalar parameter %d as an array", i);
+ } else if (n->type == Node_func) {
+ fatal("attempt to use function `%s' as array",
+ n->lnode->param);
+ }
+ aptr = assoc_lookup(n, concat_exp(ptr->rnode));
+ break;
+
+ case Node_func:
+ fatal("`%s' is a function, assignment is not allowed",
+ ptr->lnode->param);
+
+ case Node_builtin:
+ fatal("assignment is not allowed to result of builtin function");
+ default:
+ cant_happen();
+ }
+ return aptr;
+}
+
+/* match_op --- do ~ and !~ */
+
+static NODE *
+match_op(tree)
+register NODE *tree;
+{
+ register NODE *t1;
+ register Regexp *rp;
+ int i;
+ int match = TRUE;
+ int kludge_need_start = FALSE; /* FIXME: --- see below */
+
+ if (tree->type == Node_nomatch)
+ match = FALSE;
+ if (tree->type == Node_regex)
+ t1 = *get_field(0, (Func_ptr *) 0);
+ else {
+ t1 = force_string(tree_eval(tree->lnode));
+ tree = tree->rnode;
+ }
+ rp = re_update(tree);
+ /*
+ * FIXME:
+ *
+ * Any place where research() is called with a last parameter of
+ * FALSE, we need to use the avoid_dfa test. This is the only place
+ * at the moment.
+ *
+ * A new or improved dfa that distinguishes beginning/end of
+ * string from beginning/end of line will allow us to get rid of
+ * this temporary hack.
+ *
+ * The avoid_dfa() function is in re.c; it is not very smart.
+ */
+ if (avoid_dfa(tree, t1->stptr, t1->stlen))
+ kludge_need_start = TRUE;
+ i = research(rp, t1->stptr, 0, t1->stlen, kludge_need_start);
+ i = (i == -1) ^ (match == TRUE);
+ free_temp(t1);
+ return tmp_number((AWKNUM) i);
+}
+
+/* set_IGNORECASE --- update IGNORECASE as appropriate */
+
+void
+set_IGNORECASE()
+{
+ static int warned = FALSE;
+
+ if ((do_lint || do_traditional) && ! warned) {
+ warned = TRUE;
+ warning("IGNORECASE not supported in compatibility mode");
+ }
+ if (do_traditional)
+ IGNORECASE = FALSE;
+ else if ((IGNORECASE_node->var_value->flags & (STRING|STR)) != 0) {
+ if ((IGNORECASE_node->var_value->flags & MAYBE_NUM) == 0)
+ IGNORECASE = (force_string(IGNORECASE_node->var_value)->stlen > 0);
+ else
+ IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+ } else if ((IGNORECASE_node->var_value->flags & (NUM|NUMBER)) != 0)
+ IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+ else
+ IGNORECASE = FALSE; /* shouldn't happen */
+ set_FS_if_not_FIELDWIDTHS();
+}
+
+/* set_OFS --- update OFS related variables when OFS assigned to */
+
+void
+set_OFS()
+{
+ OFS = force_string(OFS_node->var_value)->stptr;
+ OFSlen = OFS_node->var_value->stlen;
+ OFS[OFSlen] = '\0';
+}
+
+/* set_ORS --- update ORS related variables when ORS assigned to */
+
+void
+set_ORS()
+{
+ ORS = force_string(ORS_node->var_value)->stptr;
+ ORSlen = ORS_node->var_value->stlen;
+ ORS[ORSlen] = '\0';
+}
+
+/* fmt_ok --- is the conversion format a valid one? */
+
+NODE **fmt_list = NULL;
+static int fmt_ok P((NODE *n));
+static int fmt_index P((NODE *n));
+
+static int
+fmt_ok(n)
+NODE *n;
+{
+ NODE *tmp = force_string(n);
+ char *p = tmp->stptr;
+
+ if (*p++ != '%')
+ return 0;
+ while (*p && strchr(" +-#", *p) != NULL) /* flags */
+ p++;
+ while (*p && isdigit(*p)) /* width - %*.*g is NOT allowed */
+ p++;
+ if (*p == '\0' || (*p != '.' && ! isdigit(*p)))
+ return 0;
+ if (*p == '.')
+ p++;
+ while (*p && isdigit(*p)) /* precision */
+ p++;
+ if (*p == '\0' || strchr("efgEG", *p) == NULL)
+ return 0;
+ if (*++p != '\0')
+ return 0;
+ return 1;
+}
+
+/* fmt_index --- track values of OFMT and CONVFMT to keep semantics correct */
+
+static int
+fmt_index(n)
+NODE *n;
+{
+ register int ix = 0;
+ static int fmt_num = 4;
+ static int fmt_hiwater = 0;
+
+ if (fmt_list == NULL)
+ emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
+ (void) force_string(n);
+ while (ix < fmt_hiwater) {
+ if (cmp_nodes(fmt_list[ix], n) == 0)
+ return ix;
+ ix++;
+ }
+ /* not found */
+ n->stptr[n->stlen] = '\0';
+ if (do_lint && ! fmt_ok(n))
+ warning("bad %sFMT specification",
+ n == CONVFMT_node->var_value ? "CONV"
+ : n == OFMT_node->var_value ? "O"
+ : "");
+
+ if (fmt_hiwater >= fmt_num) {
+ fmt_num *= 2;
+ emalloc(fmt_list, NODE **, fmt_num, "fmt_index");
+ }
+ fmt_list[fmt_hiwater] = dupnode(n);
+ return fmt_hiwater++;
+}
+
+/* set_OFMT --- track OFMT correctly */
+
+void
+set_OFMT()
+{
+ OFMTidx = fmt_index(OFMT_node->var_value);
+ OFMT = fmt_list[OFMTidx]->stptr;
+}
+
+/* set_CONVFMT --- track CONVFMT correctly */
+
+void
+set_CONVFMT()
+{
+ CONVFMTidx = fmt_index(CONVFMT_node->var_value);
+ CONVFMT = fmt_list[CONVFMTidx]->stptr;
+}
diff --git a/contrib/awk/field.c b/contrib/awk/field.c
new file mode 100644
index 0000000..31c9628
--- /dev/null
+++ b/contrib/awk/field.c
@@ -0,0 +1,915 @@
+/*
+ * field.c - routines for dealing with fields and record parsing
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+#include <assert.h>
+
+typedef void (* Setfunc) P((long, char *, long, NODE *));
+
+static long (*parse_field) P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static void rebuild_record P((void));
+static long re_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static long def_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static long posix_def_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static long null_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static long sc_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static long fw_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static void set_element P((long num, char * str, long len, NODE *arr));
+static void grow_fields_arr P((long num));
+static void set_field P((long num, char *str, long len, NODE *dummy));
+
+
+static char *parse_extent; /* marks where to restart parse of record */
+static long parse_high_water = 0; /* field number that we have parsed so far */
+static long nf_high_water = 0; /* size of fields_arr */
+static int resave_fs;
+static NODE *save_FS; /* save current value of FS when line is read,
+ * to be used in deferred parsing
+ */
+static int *FIELDWIDTHS = NULL;
+
+NODE **fields_arr; /* array of pointers to the field nodes */
+int field0_valid; /* $(>0) has not been changed yet */
+int default_FS; /* TRUE when FS == " " */
+Regexp *FS_regexp = NULL;
+static NODE *Null_field = NULL;
+
+/* init_fields --- set up the fields array to start with */
+
+void
+init_fields()
+{
+ NODE *n;
+
+ emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
+ getnode(n);
+ *n = *Nnull_string;
+ n->flags |= (SCALAR|FIELD);
+ n->flags &= ~PERM;
+ fields_arr[0] = n;
+ parse_extent = fields_arr[0]->stptr;
+ save_FS = dupnode(FS_node->var_value);
+ getnode(Null_field);
+ *Null_field = *Nnull_string;
+ Null_field->flags |= (SCALAR|FIELD);
+ Null_field->flags &= ~(NUM|NUMBER|MAYBE_NUM|PERM);
+ field0_valid = TRUE;
+}
+
+/* grow_fields --- acquire new fields as needed */
+
+static void
+grow_fields_arr(num)
+long num;
+{
+ register int t;
+ register NODE *n;
+
+ erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
+ for (t = nf_high_water + 1; t <= num; t++) {
+ getnode(n);
+ *n = *Null_field;
+ fields_arr[t] = n;
+ }
+ nf_high_water = num;
+}
+
+/* set_field --- set the value of a particular field */
+
+/*ARGSUSED*/
+static void
+set_field(num, str, len, dummy)
+long num;
+char *str;
+long len;
+NODE *dummy; /* not used -- just to make interface same as set_element */
+{
+ register NODE *n;
+
+ if (num > nf_high_water)
+ grow_fields_arr(num);
+ n = fields_arr[num];
+ n->stptr = str;
+ n->stlen = len;
+ n->flags = (STR|STRING|MAYBE_NUM|SCALAR|FIELD);
+}
+
+/* rebuild_record --- Someone assigned a value to $(something).
+ Fix up $0 to be right */
+
+static void
+rebuild_record()
+{
+ /*
+ * use explicit unsigned longs for lengths, in case
+ * a size_t isn't big enough.
+ */
+ register unsigned long tlen;
+ register unsigned long ofslen;
+ register NODE *tmp;
+ NODE *ofs;
+ char *ops;
+ register char *cops;
+ long i;
+ char *f0start, *f0end;
+
+ assert(NF != -1);
+
+ tlen = 0;
+ ofs = force_string(OFS_node->var_value);
+ ofslen = ofs->stlen;
+ for (i = NF; i > 0; i--) {
+ tmp = fields_arr[i];
+ tmp = force_string(tmp);
+ tlen += tmp->stlen;
+ }
+ tlen += (NF - 1) * ofslen;
+ if ((long) tlen < 0)
+ tlen = 0;
+ emalloc(ops, char *, tlen + 2, "rebuild_record");
+ cops = ops;
+ ops[0] = '\0';
+ for (i = 1; i <= NF; i++) {
+ tmp = fields_arr[i];
+ /* copy field */
+ if (tmp->stlen == 1)
+ *cops++ = tmp->stptr[0];
+ else if (tmp->stlen != 0) {
+ memcpy(cops, tmp->stptr, tmp->stlen);
+ cops += tmp->stlen;
+ }
+ /* copy OFS */
+ if (i != NF) {
+ if (ofslen == 1)
+ *cops++ = ofs->stptr[0];
+ else if (ofslen != 0) {
+ memcpy(cops, ofs->stptr, ofslen);
+ cops += ofslen;
+ }
+ }
+ }
+ tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
+
+ /*
+ * Since we are about to unref fields_arr[0], we want to find
+ * any fields that still point into it, and have them point
+ * into the new field zero.
+ */
+ f0start = fields_arr[0]->stptr;
+ f0end = fields_arr[0]->stptr + fields_arr[0]->stlen;
+ for (cops = ops, i = 1; i <= NF; i++) {
+ char *field_data = fields_arr[i]->stptr;
+
+ if (fields_arr[i]->stlen > 0
+ && f0start <= field_data && field_data < f0end)
+ fields_arr[i]->stptr = cops;
+
+ cops += fields_arr[i]->stlen + ofslen;
+ }
+
+ unref(fields_arr[0]);
+
+ fields_arr[0] = tmp;
+ field0_valid = TRUE;
+}
+
+/*
+ * set_record:
+ * setup $0, but defer parsing rest of line until reference is made to $(>0)
+ * or to NF. At that point, parse only as much as necessary.
+ */
+void
+set_record(buf, cnt, freeold)
+char *buf; /* ignored if ! freeold */
+int cnt; /* ignored if ! freeold */
+int freeold;
+{
+ register int i;
+ NODE *n;
+
+ NF = -1;
+ for (i = 1; i <= parse_high_water; i++) {
+ unref(fields_arr[i]);
+ getnode(n);
+ *n = *Null_field;
+ fields_arr[i] = n;
+ }
+
+ parse_high_water = 0;
+ /*
+ * $0 = $0 should resplit using the current value of FS, thus,
+ * this is executed orthogonally to the value of freeold.
+ */
+ if (resave_fs) {
+ resave_fs = FALSE;
+ unref(save_FS);
+ save_FS = dupnode(FS_node->var_value);
+ }
+ if (freeold) {
+ unref(fields_arr[0]);
+ getnode(n);
+ n->stptr = buf;
+ n->stlen = cnt;
+ n->stref = 1;
+ n->type = Node_val;
+ n->stfmt = -1;
+ n->flags = (STRING|STR|MAYBE_NUM|SCALAR|FIELD);
+ fields_arr[0] = n;
+ }
+ fields_arr[0]->flags |= MAYBE_NUM;
+ field0_valid = TRUE;
+}
+
+/* reset_record --- start over again with current $0 */
+
+void
+reset_record()
+{
+ (void) force_string(fields_arr[0]);
+ set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, FALSE);
+}
+
+/* set_NF --- handle what happens to $0 and fields when NF is changed */
+
+void
+set_NF()
+{
+ register int i;
+ NODE *n;
+
+ assert(NF != -1);
+
+ NF = (long) force_number(NF_node->var_value);
+ if (NF > nf_high_water)
+ grow_fields_arr(NF);
+ if (parse_high_water < NF) {
+ for (i = parse_high_water + 1; i <= NF; i++) {
+ unref(fields_arr[i]);
+ getnode(n);
+ *n = *Null_field;
+ fields_arr[i] = n;
+ }
+ } else if (parse_high_water > 0) {
+ for (i = NF + 1; i <= parse_high_water; i++) {
+ unref(fields_arr[i]);
+ getnode(n);
+ *n = *Null_field;
+ fields_arr[i] = n;
+ }
+ parse_high_water = NF;
+ }
+ field0_valid = FALSE;
+}
+
+/*
+ * re_parse_field --- parse fields using a regexp.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a regular
+ * expression -- either user-defined or because RS=="" and FS==" "
+ */
+static long
+re_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ if (RS_is_null && default_FS)
+ while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
+ scan++;
+ field = scan;
+ while (scan < end
+ && research(rp, scan, 0, (end - scan), TRUE) != -1
+ && nf < up_to) {
+ if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
+ scan++;
+ if (scan == end) {
+ (*set)(++nf, field, (long)(scan - field), n);
+ up_to = nf;
+ break;
+ }
+ continue;
+ }
+ (*set)(++nf, field,
+ (long)(scan + RESTART(rp, scan) - field), n);
+ scan += REEND(rp, scan);
+ field = scan;
+ if (scan == end) /* FS at end of record */
+ (*set)(++nf, field, 0L, n);
+ }
+ if (nf != up_to && scan < end) {
+ (*set)(++nf, scan, (long)(end - scan), n);
+ scan = end;
+ }
+ *buf = scan;
+ return (nf);
+}
+
+/*
+ * def_parse_field --- default field parsing.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single space
+ * character.
+ */
+
+static long
+def_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ char sav;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ /*
+ * Nasty special case. If FS set to "", return whole record
+ * as first field. This is not worth a separate function.
+ */
+ if (fs->stlen == 0) {
+ (*set)(++nf, *buf, len, n);
+ *buf += len;
+ return nf;
+ }
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+
+ *end = ' '; /* sentinel character */
+ for (; nf < up_to; scan++) {
+ /*
+ * special case: fs is single space, strip leading whitespace
+ */
+ while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
+ scan++;
+ if (scan >= end)
+ break;
+ field = scan;
+ while (*scan != ' ' && *scan != '\t' && *scan != '\n')
+ scan++;
+ (*set)(++nf, field, (long)(scan - field), n);
+ if (scan == end)
+ break;
+ }
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * posix_def_parse_field --- default field parsing.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single space
+ * character. The only difference between this and def_parse_field()
+ * is that this one does not allow newlines to separate fields.
+ */
+
+static long
+posix_def_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ char sav;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ /*
+ * Nasty special case. If FS set to "", return whole record
+ * as first field. This is not worth a separate function.
+ */
+ if (fs->stlen == 0) {
+ (*set)(++nf, *buf, len, n);
+ *buf += len;
+ return nf;
+ }
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+
+ *end = ' '; /* sentinel character */
+ for (; nf < up_to; scan++) {
+ /*
+ * special case: fs is single space, strip leading whitespace
+ */
+ while (scan < end && (*scan == ' ' || *scan == '\t'))
+ scan++;
+ if (scan >= end)
+ break;
+ field = scan;
+ while (*scan != ' ' && *scan != '\t')
+ scan++;
+ (*set)(++nf, field, (long)(scan - field), n);
+ if (scan == end)
+ break;
+ }
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * null_parse_field --- each character is a separate field
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is the null string.
+ */
+static long
+null_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ for (; nf < up_to && scan < end; scan++)
+ (*set)(++nf, scan, 1L, n);
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * sc_parse_field --- single character field separator
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single character
+ * other than space.
+ */
+static long
+sc_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register char fschar;
+ register long nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ int onecase;
+ char sav;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ if (RS_is_null && fs->stlen == 0)
+ fschar = '\n';
+ else
+ fschar = fs->stptr[0];
+
+ onecase = (IGNORECASE && isalpha(fschar));
+ if (onecase)
+ fschar = casetable[(int) fschar];
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+ *end = fschar; /* sentinel character */
+
+ for (; nf < up_to;) {
+ field = scan;
+ if (onecase) {
+ while (casetable[(int) *scan] != fschar)
+ scan++;
+ } else {
+ while (*scan != fschar)
+ scan++;
+ }
+ (*set)(++nf, field, (long)(scan - field), n);
+ if (scan == end)
+ break;
+ scan++;
+ if (scan == end) { /* FS at end of record */
+ (*set)(++nf, field, 0L, n);
+ break;
+ }
+ }
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * fw_parse_field --- field parsing using FIELDWIDTHS spec
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for fields are fixed widths.
+ */
+static long
+fw_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+ for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
+ if (len > end - scan)
+ len = end - scan;
+ (*set)(++nf, scan, (long) len, n);
+ scan += len;
+ }
+ if (len == -1)
+ *buf = end;
+ else
+ *buf = scan;
+ return nf;
+}
+
+/* get_field --- return a particular $n */
+
+NODE **
+get_field(requested, assign)
+register long requested;
+Func_ptr *assign; /* this field is on the LHS of an assign */
+{
+ /*
+ * if requesting whole line but some other field has been altered,
+ * then the whole line must be rebuilt
+ */
+ if (requested == 0) {
+ if (! field0_valid) {
+ /* first, parse remainder of input record */
+ if (NF == -1) {
+ NF = (*parse_field)(HUGE-1, &parse_extent,
+ fields_arr[0]->stlen -
+ (parse_extent - fields_arr[0]->stptr),
+ save_FS, FS_regexp, set_field,
+ (NODE *) NULL);
+ parse_high_water = NF;
+ }
+ rebuild_record();
+ }
+ if (assign != NULL)
+ *assign = reset_record;
+ return &fields_arr[0];
+ }
+
+ /* assert(requested > 0); */
+
+ if (assign != NULL)
+ field0_valid = FALSE; /* $0 needs reconstruction */
+
+ if (requested <= parse_high_water) /* already parsed this field */
+ return &fields_arr[requested];
+
+ if (NF == -1) { /* have not yet parsed to end of record */
+ /*
+ * parse up to requested fields, calling set_field() for each,
+ * saving in parse_extent the point where the parse left off
+ */
+ if (parse_high_water == 0) /* starting at the beginning */
+ parse_extent = fields_arr[0]->stptr;
+ parse_high_water = (*parse_field)(requested, &parse_extent,
+ fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr),
+ save_FS, FS_regexp, set_field, (NODE *) NULL);
+
+ /*
+ * if we reached the end of the record, set NF to the number of
+ * fields so far. Note that requested might actually refer to
+ * a field that is beyond the end of the record, but we won't
+ * set NF to that value at this point, since this is only a
+ * reference to the field and NF only gets set if the field
+ * is assigned to -- this case is handled below
+ */
+ if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
+ NF = parse_high_water;
+ if (requested == HUGE-1) /* HUGE-1 means set NF */
+ requested = parse_high_water;
+ }
+ if (parse_high_water < requested) { /* requested beyond end of record */
+ if (assign != NULL) { /* expand record */
+ if (requested > nf_high_water)
+ grow_fields_arr(requested);
+
+ NF = requested;
+ parse_high_water = requested;
+ } else
+ return &Null_field;
+ }
+
+ return &fields_arr[requested];
+}
+
+/* set_element --- set an array element, used by do_split() */
+
+static void
+set_element(num, s, len, n)
+long num;
+char *s;
+long len;
+NODE *n;
+{
+ register NODE *it;
+
+ it = make_string(s, len);
+ it->flags |= MAYBE_NUM;
+ *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
+}
+
+/* do_split --- implement split(), semantics are same as for field splitting */
+
+NODE *
+do_split(tree)
+NODE *tree;
+{
+ NODE *src, *arr, *sep, *tmp;
+ NODE *fs;
+ char *s;
+ long (*parseit) P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+ Regexp *rp = NULL;
+
+ /*
+ * do dupnode(), to avoid problems like
+ * x = split(a[1], a, "blah")
+ * since we assoc_clear the array. gack.
+ * this also gives us complete call by value semantics.
+ */
+ tmp = tree_eval(tree->lnode);
+ src = dupnode(tmp);
+ free_temp(tmp);
+
+ arr = tree->rnode->lnode;
+ if (tree->rnode->rnode != NULL)
+ sep = tree->rnode->rnode->lnode; /* 3rd arg */
+ else
+ sep = NULL;
+
+ (void) force_string(src);
+
+ if (arr->type == Node_param_list)
+ arr = stack_ptr[arr->param_cnt];
+ if (arr->type != Node_var && arr->type != Node_var_array)
+ fatal("second argument of split is not an array");
+ arr->type = Node_var_array;
+ assoc_clear(arr);
+
+ if (sep->re_flags & FS_DFLT) {
+ parseit = parse_field;
+ fs = force_string(FS_node->var_value);
+ rp = FS_regexp;
+ } else {
+ tmp = force_string(tree_eval(sep->re_exp));
+ if (tmp->stlen == 0)
+ parseit = null_parse_field;
+ else if (tmp->stlen == 1 && (sep->re_flags & CONST) == 0) {
+ if (tmp->stptr[0] == ' ') {
+ if (do_posix)
+ parseit = posix_def_parse_field;
+ else
+ parseit = def_parse_field;
+ } else
+ parseit = sc_parse_field;
+ } else {
+ parseit = re_parse_field;
+ rp = re_update(sep);
+ }
+ fs = tmp;
+ }
+
+ s = src->stptr;
+ tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) src->stlen,
+ fs, rp, set_element, arr));
+ unref(src);
+ free_temp(sep);
+ return tmp;
+}
+
+/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
+
+void
+set_FIELDWIDTHS()
+{
+ register char *scan;
+ char *end;
+ register int i;
+ static int fw_alloc = 1;
+ static int warned = FALSE;
+ extern double strtod();
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("use of FIELDWIDTHS is a gawk extension");
+ }
+ if (do_traditional) /* quick and dirty, does the trick */
+ return;
+
+ /*
+ * If changing the way fields are split, obey least-suprise
+ * semantics, and force $0 to be split totally.
+ */
+ if (fields_arr != NULL)
+ (void) get_field(HUGE - 1, 0);
+
+ parse_field = fw_parse_field;
+ scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
+ end = scan + 1;
+ if (FIELDWIDTHS == NULL)
+ emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+ FIELDWIDTHS[0] = 0;
+ for (i = 1; ; i++) {
+ if (i >= fw_alloc) {
+ fw_alloc *= 2;
+ erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+ }
+ FIELDWIDTHS[i] = (int) strtod(scan, &end);
+ if (end == scan)
+ break;
+ scan = end;
+ }
+ FIELDWIDTHS[i] = -1;
+}
+
+void
+set_FS_if_not_FIELDWIDTHS()
+{
+ if (parse_field != fw_parse_field)
+ set_FS();
+}
+
+/* set_FS --- handle things when FS is assigned to */
+
+void
+set_FS()
+{
+ char buf[10];
+ NODE *fs;
+ static NODE *save_fs = NULL;
+ static NODE *save_rs = NULL;
+
+ /*
+ * If changing the way fields are split, obey least-suprise
+ * semantics, and force $0 to be split totally.
+ */
+ if (fields_arr != NULL)
+ (void) get_field(HUGE - 1, 0);
+
+ if (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0
+ && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
+ return;
+ unref(save_fs);
+ save_fs = dupnode(FS_node->var_value);
+ unref(save_rs);
+ save_rs = dupnode(RS_node->var_value);
+ resave_fs = TRUE;
+ buf[0] = '\0';
+ default_FS = FALSE;
+ if (FS_regexp) {
+ refree(FS_regexp);
+ FS_regexp = NULL;
+ }
+ fs = force_string(FS_node->var_value);
+ if (! do_traditional && fs->stlen == 0)
+ parse_field = null_parse_field;
+ else if (fs->stlen > 1)
+ parse_field = re_parse_field;
+ else if (RS_is_null) {
+ parse_field = sc_parse_field;
+ if (fs->stlen == 1) {
+ if (fs->stptr[0] == ' ') {
+ default_FS = TRUE;
+ strcpy(buf, "[ \t\n]+");
+ } else if (fs->stptr[0] != '\n')
+ sprintf(buf, "[%c\n]", fs->stptr[0]);
+ }
+ } else {
+ if (do_posix)
+ parse_field = posix_def_parse_field;
+ else
+ parse_field = def_parse_field;
+ if (fs->stptr[0] == ' ' && fs->stlen == 1)
+ default_FS = TRUE;
+ else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
+ if (! IGNORECASE)
+ parse_field = sc_parse_field;
+ else if (fs->stptr[0] == '\\')
+ /* yet another special case */
+ strcpy(buf, "[\\\\]");
+ else
+ sprintf(buf, "[%c]", fs->stptr[0]);
+ }
+ }
+ if (buf[0] != '\0') {
+ FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, TRUE);
+ parse_field = re_parse_field;
+ } else if (parse_field == re_parse_field) {
+ FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, TRUE);
+ } else
+ FS_regexp = NULL;
+}
+
+/* using_fieldwidths --- is FS or FIELDWIDTHS in use? */
+
+int
+using_fieldwidths()
+{
+ return parse_field == fw_parse_field;
+}
+
diff --git a/contrib/awk/gawkmisc.c b/contrib/awk/gawkmisc.c
new file mode 100644
index 0000000..0707971
--- /dev/null
+++ b/contrib/awk/gawkmisc.c
@@ -0,0 +1,63 @@
+/*
+ * gawkmisc.c --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 - 97 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+
+/* some old compilers don't grok #elif. sigh */
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32)
+#include "gawkmisc.pc"
+#else
+#if defined(VMS)
+#include "vms/gawkmisc.vms"
+#else
+#if defined(atarist)
+#include "atari/gawkmisc.atr"
+#else
+#include "posix/gawkmisc.c"
+#endif
+#endif
+#endif
+
+/* xmalloc --- provide this so that other GNU library routines work */
+
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
+
+extern pointer xmalloc P((size_t bytes)); /* get rid of gcc warning */
+
+pointer
+xmalloc(bytes)
+size_t bytes;
+{
+ pointer p;
+
+ emalloc(p, pointer, bytes, "xmalloc");
+
+ return p;
+}
diff --git a/contrib/awk/getopt.c b/contrib/awk/getopt.c
new file mode 100644
index 0000000..eac576b
--- /dev/null
+++ b/contrib/awk/getopt.c
@@ -0,0 +1,1000 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97
+ Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+#include <stdlib.h>
+#include <unistd.h>
+#endif /* GNU C library. */
+
+#ifdef VMS
+#include <unixlib.h>
+#if HAVE_STRING_H - 0
+#include <string.h>
+#endif
+#endif
+
+#if defined (WIN32) && !defined (__CYGWIN32__)
+/* It's not Unix, really. See? Capital letters. */
+#include <windows.h>
+#define getpid() GetCurrentProcessId()
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+ When compiling libc, the _ macro is predefined. */
+#ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+#else
+# define _(msgid) (msgid)
+#endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+ causes problems with re-calling getopt as programs generally don't
+ know that. */
+
+int __getopt_initialized = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+#include <string.h>
+#define my_index strchr
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+char *getenv ();
+
+static char *
+my_index (str, chr)
+ const char *str;
+ int chr;
+{
+ while (*str)
+ {
+ if (*str == chr)
+ return (char *) str;
+ str++;
+ }
+ return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it. */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+#if !defined (__STDC__) || !__STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+static const char *nonoption_flags;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+ is valid for the getopt call we must make sure that the ARGV passed
+ to getopt is that one passed to the process. */
+static void store_args (int argc, char *const *argv) __attribute__ ((unused));
+static void
+store_args (int argc, char *const *argv)
+{
+ /* XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+ original_argc = argc;
+ original_argv = argv;
+}
+text_set_element (__libc_subinit, store_args);
+#endif
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+#if defined (__STDC__) && __STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+#if defined (__STDC__) && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+#ifdef _LIBC
+ if (posixly_correct == NULL
+ && argc == original_argc && argv == original_argv)
+ {
+ /* Bash 2.0 puts a special variable in the environment for each
+ command it runs, specifying which ARGV elements are the results of
+ file name wildcard expansion and therefore should not be
+ considered as options. */
+ char var[100];
+ sprintf (var, "_%d_GNU_nonoption_argv_flags_", getpid ());
+ nonoption_flags = getenv (var);
+ if (nonoption_flags == NULL)
+ nonoption_flags_len = 0;
+ else
+ nonoption_flags_len = strlen (nonoption_flags);
+ }
+ else
+ nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns -1.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ optarg = NULL;
+
+ if (!__getopt_initialized || optind == 0)
+ {
+ optstring = _getopt_initialize (argc, argv, optstring);
+ optind = 1; /* Don't scan ARGV[0], the program name. */
+ __getopt_initialized = 1;
+ }
+
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#ifdef _LIBC
+#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \
+ || (optind < nonoption_flags_len \
+ && nonoption_flags[optind] == '1'))
+#else
+#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (last_nonopt > optind)
+ last_nonopt = optind;
+ if (first_nonopt > optind)
+ first_nonopt = optind;
+
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc && NONOPTION_P)
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return -1;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if (NONOPTION_P)
+ {
+ if (ordering == REQUIRE_ORDER)
+ return -1;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = -1;
+ int option_index;
+
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar)
+ == (unsigned int) strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ _("%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ _("%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[optind - 1][0], pfound->name);
+
+ nextchar += strlen (nextchar);
+
+ optopt = pfound->val;
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+ else
+ fprintf (stderr, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ return '?';
+ }
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = NULL;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/contrib/awk/getopt.h b/contrib/awk/getopt.h
new file mode 100644
index 0000000..7dad11b
--- /dev/null
+++ b/contrib/awk/getopt.h
@@ -0,0 +1,133 @@
+/* Declarations for getopt.
+ Copyright (C) 1989,90,91,92,93,94,96,97 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if defined (__STDC__) && __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
+
+#if defined (__STDC__) && __STDC__
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/contrib/awk/getopt1.c b/contrib/awk/getopt1.c
new file mode 100644
index 0000000..8347bb1
--- /dev/null
+++ b/contrib/awk/getopt1.c
@@ -0,0 +1,189 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "getopt.h"
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/contrib/awk/io.c b/contrib/awk/io.c
new file mode 100644
index 0000000..74d9a8d
--- /dev/null
+++ b/contrib/awk/io.c
@@ -0,0 +1,1941 @@
+/*
+ * io.c --- routines for dealing with input and output and records
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+#undef HAVE_MMAP /* for now, probably forever */
+
+#ifdef HAVE_SYS_PARAM_H
+#undef RE_DUP_MAX /* avoid spurious conflict w/regex.h */
+#include <sys/param.h>
+#endif /* HAVE_SYS_PARAM_H */
+
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif /* HAVE_SYS_WAIT_H */
+
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#ifndef MAP_FAILED
+#define MAP_FAILED ((caddr_t) -1)
+#endif /* ! defined (MAP_FAILED) */
+#endif /* HAVE_MMAP */
+
+#ifndef O_RDONLY
+#include <fcntl.h>
+#endif
+#ifndef O_ACCMODE
+#define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR)
+#endif
+
+#include <assert.h>
+
+#if ! defined(S_ISREG) && defined(S_IFREG)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+
+#if ! defined(S_ISDIR) && defined(S_IFDIR)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+
+#ifndef ENFILE
+#define ENFILE EMFILE
+#endif
+
+#ifdef atarist
+#include <stddef.h>
+#endif
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32)
+#define PIPES_SIMULATED
+#endif
+
+static IOBUF *nextfile P((int skipping));
+static int inrec P((IOBUF *iop));
+static int iop_close P((IOBUF *iop));
+struct redirect *redirect P((NODE *tree, int *errflg));
+static void close_one P((void));
+static int close_redir P((struct redirect *rp, int exitwarn));
+#ifndef PIPES_SIMULATED
+static int wait_any P((int interesting));
+#endif
+static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
+static IOBUF *iop_open P((const char *file, const char *how, IOBUF *buf));
+static IOBUF *iop_alloc P((int fd, const char *name, IOBUF *buf));
+static int gawk_pclose P((struct redirect *rp));
+static int do_pathopen P((const char *file));
+static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
+#ifdef HAVE_MMAP
+static int mmap_get_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
+#endif /* HAVE_MMAP */
+static int str2mode P((const char *mode));
+static void spec_setup P((IOBUF *iop, int len, int allocate));
+static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
+static int pidopen P((IOBUF *iop, const char *name, const char *mode));
+static int useropen P((IOBUF *iop, const char *name, const char *mode));
+
+#if defined (MSDOS) && !defined (__GO32__)
+#include "popen.h"
+#define popen(c, m) os_popen(c, m)
+#define pclose(f) os_pclose(f)
+#else
+#if defined (OS2) /* OS/2, but not family mode */
+#if defined (_MSC_VER)
+#define popen(c, m) _popen(c, m)
+#define pclose(f) _pclose(f)
+#endif
+#else
+extern FILE *popen();
+#endif
+#endif
+
+static struct redirect *red_head = NULL;
+static NODE *RS;
+static Regexp *RS_regexp;
+
+int RS_is_null;
+
+extern int output_is_tty;
+extern NODE *ARGC_node;
+extern NODE *ARGV_node;
+extern NODE *ARGIND_node;
+extern NODE *ERRNO_node;
+extern NODE **fields_arr;
+
+static jmp_buf filebuf; /* for do_nextfile() */
+
+/* do_nextfile --- implement gawk "nextfile" extension */
+
+void
+do_nextfile()
+{
+ (void) nextfile(TRUE);
+ longjmp(filebuf, 1);
+}
+
+/* nextfile --- move to the next input data file */
+
+static IOBUF *
+nextfile(skipping)
+int skipping;
+{
+ static long i = 1;
+ static int files = 0;
+ NODE *arg;
+ static IOBUF *curfile = NULL;
+ static IOBUF mybuf;
+ const char *fname;
+
+ if (skipping) {
+ if (curfile != NULL)
+ iop_close(curfile);
+ curfile = NULL;
+ return NULL;
+ }
+ if (curfile != NULL) {
+ if (curfile->cnt == EOF) {
+ (void) iop_close(curfile);
+ curfile = NULL;
+ } else
+ return curfile;
+ }
+ for (; i < (long) (ARGC_node->lnode->numbr); i++) {
+ arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
+ if (arg->stlen == 0)
+ continue;
+ arg->stptr[arg->stlen] = '\0';
+ if (! do_traditional) {
+ unref(ARGIND_node->var_value);
+ ARGIND_node->var_value = make_number((AWKNUM) i);
+ }
+ if (! arg_assign(arg->stptr)) {
+ files++;
+ fname = arg->stptr;
+ curfile = iop_open(fname, "r", &mybuf);
+ if (curfile == NULL)
+ goto give_up;
+ curfile->flag |= IOP_NOFREE_OBJ;
+ /* This is a kludge. */
+ unref(FILENAME_node->var_value);
+ FILENAME_node->var_value = dupnode(arg);
+ FNR = 0;
+ i++;
+ break;
+ }
+ }
+ if (files == 0) {
+ files++;
+ /* no args. -- use stdin */
+ /* FNR is init'ed to 0 */
+ FILENAME_node->var_value = make_string("-", 1);
+ fname = "-";
+ curfile = iop_open(fname, "r", &mybuf);
+ if (curfile == NULL)
+ goto give_up;
+ curfile->flag |= IOP_NOFREE_OBJ;
+ }
+ return curfile;
+
+ give_up:
+ fatal("cannot open file `%s' for reading (%s)",
+ fname, strerror(errno));
+ /* NOTREACHED */
+ return 0;
+}
+
+/* set_FNR --- update internal FNR from awk variable */
+
+void
+set_FNR()
+{
+ FNR = (long) FNR_node->var_value->numbr;
+}
+
+/* set_NR --- update internal NR from awk variable */
+
+void
+set_NR()
+{
+ NR = (long) NR_node->var_value->numbr;
+}
+
+/* inrec --- This reads in a record from the input file */
+
+static int
+inrec(iop)
+IOBUF *iop;
+{
+ char *begin;
+ register int cnt;
+ int retval = 0;
+
+ if ((cnt = iop->cnt) != EOF)
+ cnt = (*(iop->getrec))
+ (&begin, iop, RS->stptr[0], RS_regexp, NULL);
+ if (cnt == EOF) {
+ cnt = 0;
+ retval = 1;
+ } else {
+ NR += 1;
+ FNR += 1;
+ set_record(begin, cnt, TRUE);
+ }
+
+ return retval;
+}
+
+/* iop_close --- close an open IOP */
+
+static int
+iop_close(iop)
+IOBUF *iop;
+{
+ int ret;
+
+ if (iop == NULL)
+ return 0;
+ errno = 0;
+
+#ifdef _CRAY
+ /* Work around bug in UNICOS popen */
+ if (iop->fd < 3)
+ ret = 0;
+ else
+#endif
+ /* save these for re-use; don't free the storage */
+ if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+ iop->off = iop->buf;
+ iop->end = iop->buf + strlen(iop->buf);
+ iop->cnt = 0;
+ iop->secsiz = 0;
+ return 0;
+ }
+
+ /* Don't close standard files or else crufty code elsewhere will lose */
+ if (iop->fd == fileno(stdin)
+ || iop->fd == fileno(stdout)
+ || iop->fd == fileno(stderr)
+ || (iop->flag & IOP_MMAPPED) != 0)
+ ret = 0;
+ else
+ ret = close(iop->fd);
+
+ if (ret == -1)
+ warning("close of fd %d (`%s') failed (%s)", iop->fd,
+ iop->name, strerror(errno));
+ if ((iop->flag & IOP_NO_FREE) == 0) {
+ /*
+ * Be careful -- $0 may still reference the buffer even though
+ * an explicit close is being done; in the future, maybe we
+ * can do this a bit better.
+ */
+ if (iop->buf) {
+ if ((fields_arr[0]->stptr >= iop->buf)
+ && (fields_arr[0]->stptr < (iop->buf + iop->secsiz + iop->size))) {
+ NODE *t;
+
+ t = make_string(fields_arr[0]->stptr,
+ fields_arr[0]->stlen);
+ unref(fields_arr[0]);
+ fields_arr[0] = t;
+ reset_record();
+ }
+ if ((iop->flag & IOP_MMAPPED) == 0)
+ free(iop->buf);
+#ifdef HAVE_MMAP
+ else
+ (void) munmap(iop->buf, iop->size);
+#endif
+ }
+ if ((iop->flag & IOP_NOFREE_OBJ) == 0)
+ free((char *) iop);
+ }
+ return ret == -1 ? 1 : 0;
+}
+
+/* do_input --- the main input processing loop */
+
+void
+do_input()
+{
+ IOBUF *iop;
+ extern int exiting;
+
+ (void) setjmp(filebuf); /* for `nextfile' */
+
+ while ((iop = nextfile(FALSE)) != NULL) {
+ if (inrec(iop) == 0)
+ while (interpret(expression_value) && inrec(iop) == 0)
+ continue;
+#ifdef C_ALLOCA
+ /* recover any space from C based alloca */
+ (void) alloca(0);
+#endif
+ if (exiting)
+ break;
+ }
+}
+
+/* redirect --- Redirection for printf and print commands */
+
+struct redirect *
+redirect(tree, errflg)
+NODE *tree;
+int *errflg;
+{
+ register NODE *tmp;
+ register struct redirect *rp;
+ register char *str;
+ int tflag = 0;
+ int outflag = 0;
+ const char *direction = "to";
+ const char *mode;
+ int fd;
+ const char *what = NULL;
+
+ switch (tree->type) {
+ case Node_redirect_append:
+ tflag = RED_APPEND;
+ /* FALL THROUGH */
+ case Node_redirect_output:
+ outflag = (RED_FILE|RED_WRITE);
+ tflag |= outflag;
+ if (tree->type == Node_redirect_output)
+ what = ">";
+ else
+ what = ">>";
+ break;
+ case Node_redirect_pipe:
+ tflag = (RED_PIPE|RED_WRITE);
+ what = "|";
+ break;
+ case Node_redirect_pipein:
+ tflag = (RED_PIPE|RED_READ);
+ what = "|";
+ break;
+ case Node_redirect_input:
+ tflag = (RED_FILE|RED_READ);
+ what = "<";
+ break;
+ default:
+ fatal("invalid tree type %d in redirect()", tree->type);
+ break;
+ }
+ tmp = tree_eval(tree->subnode);
+ if (do_lint && (tmp->flags & STR) == 0)
+ warning("expression in `%s' redirection only has numeric value",
+ what);
+ tmp = force_string(tmp);
+ str = tmp->stptr;
+
+ if (str == NULL || *str == '\0')
+ fatal("expression for `%s' redirection has null string value",
+ what);
+
+ if (do_lint
+ && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen)))
+ warning("filename `%s' for `%s' redirection may be result of logical expression", str, what);
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ if (strlen(rp->value) == tmp->stlen
+ && STREQN(rp->value, str, tmp->stlen)
+ && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
+ || (outflag != 0
+ && (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
+ break;
+ if (rp == NULL) {
+ emalloc(rp, struct redirect *, sizeof(struct redirect),
+ "redirect");
+ emalloc(str, char *, tmp->stlen+1, "redirect");
+ memcpy(str, tmp->stptr, tmp->stlen);
+ str[tmp->stlen] = '\0';
+ rp->value = str;
+ rp->flag = tflag;
+ rp->fp = NULL;
+ rp->iop = NULL;
+ rp->pid = 0; /* unlikely that we're worried about init */
+ rp->status = 0;
+ /* maintain list in most-recently-used first order */
+ if (red_head != NULL)
+ red_head->prev = rp;
+ rp->prev = NULL;
+ rp->next = red_head;
+ red_head = rp;
+ } else
+ str = rp->value; /* get \0 terminated string */
+ while (rp->fp == NULL && rp->iop == NULL) {
+ if (rp->flag & RED_EOF)
+ /*
+ * encountered EOF on file or pipe -- must be cleared
+ * by explicit close() before reading more
+ */
+ return rp;
+ mode = NULL;
+ errno = 0;
+ switch (tree->type) {
+ case Node_redirect_output:
+ mode = "w";
+ if ((rp->flag & RED_USED) != 0)
+ mode = "a";
+ break;
+ case Node_redirect_append:
+ mode = "a";
+ break;
+ case Node_redirect_pipe:
+ /* synchronize output before new pipe */
+ (void) flush_io();
+
+ if ((rp->fp = popen(str, "w")) == NULL)
+ fatal("can't open pipe (\"%s\") for output (%s)",
+ str, strerror(errno));
+ rp->flag |= RED_NOBUF;
+ break;
+ case Node_redirect_pipein:
+ direction = "from";
+ if (gawk_popen(str, rp) == NULL)
+ fatal("can't open pipe (\"%s\") for input (%s)",
+ str, strerror(errno));
+ break;
+ case Node_redirect_input:
+ direction = "from";
+ rp->iop = iop_open(str, "r", NULL);
+ break;
+ default:
+ cant_happen();
+ }
+ if (mode != NULL) {
+ errno = 0;
+ fd = devopen(str, mode);
+ if (fd > INVALID_HANDLE) {
+ if (fd == fileno(stdin))
+ rp->fp = stdin;
+ else if (fd == fileno(stdout))
+ rp->fp = stdout;
+ else if (fd == fileno(stderr))
+ rp->fp = stderr;
+ else {
+ rp->fp = fdopen(fd, (char *) mode);
+ /* don't leak file descriptors */
+ if (rp->fp == NULL)
+ close(fd);
+ }
+ if (rp->fp != NULL && isatty(fd))
+ rp->flag |= RED_NOBUF;
+ }
+ }
+ if (rp->fp == NULL && rp->iop == NULL) {
+ /* too many files open -- close one and try again */
+ if (errno == EMFILE || errno == ENFILE)
+ close_one();
+#ifdef HAVE_MMAP
+ /* this works for solaris 2.5, not sunos */
+ else if (errno == 0) /* HACK! */
+ close_one();
+#endif
+ else {
+ /*
+ * Some other reason for failure.
+ *
+ * On redirection of input from a file,
+ * just return an error, so e.g. getline
+ * can return -1. For output to file,
+ * complain. The shell will complain on
+ * a bad command to a pipe.
+ */
+ if (errflg != NULL)
+ *errflg = errno;
+ if (tree->type == Node_redirect_output
+ || tree->type == Node_redirect_append)
+ fatal("can't redirect %s `%s' (%s)",
+ direction, str, strerror(errno));
+ else {
+ free_temp(tmp);
+ return NULL;
+ }
+ }
+ }
+ }
+ free_temp(tmp);
+ return rp;
+}
+
+/* getredirect --- find the struct redirect for this file or pipe */
+
+struct redirect *
+getredirect(str, len)
+char *str;
+int len;
+{
+ struct redirect *rp;
+
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ if (strlen(rp->value) == len && STREQN(rp->value, str, len))
+ return rp;
+
+ return NULL;
+}
+
+/* close_one --- temporarily close an open file to re-use the fd */
+
+static void
+close_one()
+{
+ register struct redirect *rp;
+ register struct redirect *rplast = NULL;
+
+ /* go to end of list first, to pick up least recently used entry */
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ rplast = rp;
+ /* now work back up through the list */
+ for (rp = rplast; rp != NULL; rp = rp->prev)
+ if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) {
+ rp->flag |= RED_USED;
+ errno = 0;
+ if (/* do_lint && */ fclose(rp->fp) != 0)
+ warning("close of \"%s\" failed (%s).",
+ rp->value, strerror(errno));
+ rp->fp = NULL;
+ break;
+ }
+ if (rp == NULL)
+ /* surely this is the only reason ??? */
+ fatal("too many pipes or input files open");
+}
+
+/* do_close --- completely close an open file or pipe */
+
+NODE *
+do_close(tree)
+NODE *tree;
+{
+ NODE *tmp;
+ register struct redirect *rp;
+
+ tmp = force_string(tree_eval(tree->subnode));
+
+ /* icky special case: close(FILENAME) called. */
+ if (tree->subnode == FILENAME_node
+ || (tmp->stlen == FILENAME_node->var_value->stlen
+ && STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) {
+ (void) nextfile(TRUE);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) 0.0);
+ }
+
+ for (rp = red_head; rp != NULL; rp = rp->next) {
+ if (strlen(rp->value) == tmp->stlen
+ && STREQN(rp->value, tmp->stptr, tmp->stlen))
+ break;
+ }
+ if (rp == NULL) { /* no match */
+ if (do_lint)
+ warning("close: `%.*s' is not an open file or pipe",
+ tmp->stlen, tmp->stptr);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) 0.0);
+ }
+ free_temp(tmp);
+ fflush(stdout); /* synchronize regular output */
+ tmp = tmp_number((AWKNUM) close_redir(rp, FALSE));
+ rp = NULL;
+ return tmp;
+}
+
+/* close_redir --- close an open file or pipe */
+
+static int
+close_redir(rp, exitwarn)
+register struct redirect *rp;
+int exitwarn;
+{
+ int status = 0;
+ char *what;
+
+ if (rp == NULL)
+ return 0;
+ if (rp->fp == stdout || rp->fp == stderr)
+ return 0;
+ errno = 0;
+ if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
+ status = pclose(rp->fp);
+ else if (rp->fp != NULL)
+ status = fclose(rp->fp);
+ else if (rp->iop != NULL) {
+ if ((rp->flag & RED_PIPE) != 0)
+ status = gawk_pclose(rp);
+ else {
+ status = iop_close(rp->iop);
+ rp->iop = NULL;
+ }
+ }
+
+ what = ((rp->flag & RED_PIPE) != 0) ? "pipe" : "file";
+
+ if (exitwarn)
+ warning("no explicit close of %s `%s' provided",
+ what, rp->value);
+
+ /* SVR4 awk checks and warns about status of close */
+ if (status != 0) {
+ char *s = strerror(errno);
+
+ /*
+ * Too many people have complained about this.
+ * As of 2.15.6, it is now under lint control.
+ */
+ if (do_lint)
+ warning("failure status (%d) on %s close of \"%s\" (%s)",
+ status, what, rp->value, s);
+
+ if (! do_traditional) {
+ /* set ERRNO too so that program can get at it */
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value = make_string(s, strlen(s));
+ }
+ }
+ if (rp->next != NULL)
+ rp->next->prev = rp->prev;
+ if (rp->prev != NULL)
+ rp->prev->next = rp->next;
+ else
+ red_head = rp->next;
+ free(rp->value);
+ free((char *) rp);
+ return status;
+}
+
+/* flush_io --- flush all open output files */
+
+int
+flush_io()
+{
+ register struct redirect *rp;
+ int status = 0;
+
+ errno = 0;
+ if (fflush(stdout)) {
+ warning("error writing standard output (%s)", strerror(errno));
+ status++;
+ }
+ if (fflush(stderr)) {
+ warning("error writing standard error (%s)", strerror(errno));
+ status++;
+ }
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ /* flush both files and pipes, what the heck */
+ if ((rp->flag & RED_WRITE) && rp->fp != NULL) {
+ if (fflush(rp->fp)) {
+ warning("%s flush of \"%s\" failed (%s).",
+ (rp->flag & RED_PIPE) ? "pipe" :
+ "file", rp->value, strerror(errno));
+ status++;
+ }
+ }
+ return status;
+}
+
+/* close_io --- close all open files, called when exiting */
+
+int
+close_io()
+{
+ register struct redirect *rp;
+ register struct redirect *next;
+ int status = 0;
+
+ errno = 0;
+ for (rp = red_head; rp != NULL; rp = next) {
+ next = rp->next;
+ /*
+ * close_redir() will print a message if needed
+ * if do_lint, warn about lack of explicit close
+ */
+ if (close_redir(rp, do_lint))
+ status++;
+ rp = NULL;
+ }
+ /*
+ * Some of the non-Unix os's have problems doing an fclose
+ * on stdout and stderr. Since we don't really need to close
+ * them, we just flush them, and do that across the board.
+ */
+ if (fflush(stdout)) {
+ warning("error writing standard output (%s)", strerror(errno));
+ status++;
+ }
+ if (fflush(stderr)) {
+ warning("error writing standard error (%s)", strerror(errno));
+ status++;
+ }
+ return status;
+}
+
+/* str2mode --- convert a string mode to an integer mode */
+
+static int
+str2mode(mode)
+const char *mode;
+{
+ int ret;
+
+ switch(mode[0]) {
+ case 'r':
+ ret = O_RDONLY;
+ break;
+
+ case 'w':
+ ret = O_WRONLY|O_CREAT|O_TRUNC;
+ break;
+
+ case 'a':
+ ret = O_WRONLY|O_APPEND|O_CREAT;
+ break;
+
+ default:
+ ret = 0; /* lint */
+ cant_happen();
+ }
+ return ret;
+}
+
+/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
+
+/*
+ * This separate version is still needed for output, since file and pipe
+ * output is done with stdio. iop_open() handles input with IOBUFs of
+ * more "special" files. Those files are not handled here since it makes
+ * no sense to use them for output.
+ */
+
+int
+devopen(name, mode)
+const char *name, *mode;
+{
+ int openfd;
+ const char *cp;
+ char *ptr;
+ int flag = 0;
+ struct stat buf;
+ extern double strtod();
+
+ flag = str2mode(mode);
+
+ if (STREQ(name, "-"))
+ openfd = fileno(stdin);
+ else
+ openfd = INVALID_HANDLE;
+
+ if (do_traditional)
+ goto strictopen;
+
+ if ((openfd = os_devopen(name, flag)) >= 0)
+ return openfd;
+
+ if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
+ cp = name + 5;
+
+ if (STREQ(cp, "stdin") && (flag & O_ACCMODE) == O_RDONLY)
+ openfd = fileno(stdin);
+ else if (STREQ(cp, "stdout") && (flag & O_ACCMODE) == O_WRONLY)
+ openfd = fileno(stdout);
+ else if (STREQ(cp, "stderr") && (flag & O_ACCMODE) == O_WRONLY)
+ openfd = fileno(stderr);
+ else if (STREQN(cp, "fd/", 3)) {
+ cp += 3;
+ openfd = (int) strtod(cp, &ptr);
+ if (openfd <= INVALID_HANDLE || ptr == cp)
+ openfd = INVALID_HANDLE;
+ }
+ }
+
+strictopen:
+ if (openfd == INVALID_HANDLE)
+ openfd = open(name, flag, 0666);
+ if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
+ if (S_ISDIR(buf.st_mode))
+ fatal("file `%s' is a directory", name);
+ return openfd;
+}
+
+
+/* spec_setup --- setup an IOBUF for a special internal file */
+
+static void
+spec_setup(iop, len, allocate)
+IOBUF *iop;
+int len;
+int allocate;
+{
+ char *cp;
+
+ if (allocate) {
+ emalloc(cp, char *, len+2, "spec_setup");
+ iop->buf = cp;
+ } else {
+ len = strlen(iop->buf);
+ iop->buf[len++] = '\n'; /* get_a_record clobbered it */
+ iop->buf[len] = '\0'; /* just in case */
+ }
+ iop->off = iop->buf;
+ iop->cnt = 0;
+ iop->secsiz = 0;
+ iop->size = len;
+ iop->end = iop->buf + len;
+ iop->fd = -1;
+ iop->flag = IOP_IS_INTERNAL;
+ iop->getrec = get_a_record;
+}
+
+/* specfdopen --- open an fd special file */
+
+static int
+specfdopen(iop, name, mode)
+IOBUF *iop;
+const char *name, *mode;
+{
+ int fd;
+ IOBUF *tp;
+
+ fd = devopen(name, mode);
+ if (fd == INVALID_HANDLE)
+ return INVALID_HANDLE;
+ tp = iop_alloc(fd, name, NULL);
+ if (tp == NULL) {
+ /* don't leak fd's */
+ close(fd);
+ return INVALID_HANDLE;
+ }
+ *iop = *tp;
+ iop->flag |= IOP_NO_FREE;
+ free(tp);
+ return 0;
+}
+
+#ifdef GETPGRP_VOID
+#define getpgrp_arg() /* nothing */
+#else
+#define getpgrp_arg() getpid()
+#endif
+
+/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */
+
+static int
+pidopen(iop, name, mode)
+IOBUF *iop;
+const char *name, *mode;
+{
+ char tbuf[BUFSIZ];
+ int i;
+
+ if (name[6] == 'g')
+ sprintf(tbuf, "%d\n", getpgrp(getpgrp_arg()));
+ else if (name[6] == 'i')
+ sprintf(tbuf, "%d\n", getpid());
+ else
+ sprintf(tbuf, "%d\n", getppid());
+ i = strlen(tbuf);
+ spec_setup(iop, i, TRUE);
+ strcpy(iop->buf, tbuf);
+ return 0;
+}
+
+/* useropen --- "open" /dev/user */
+
+/*
+ * /dev/user creates a record as follows:
+ * $1 = getuid()
+ * $2 = geteuid()
+ * $3 = getgid()
+ * $4 = getegid()
+ * If multiple groups are supported, then $5 through $NF are the
+ * supplementary group set.
+ */
+
+static int
+useropen(iop, name, mode)
+IOBUF *iop;
+const char *name, *mode;
+{
+ char tbuf[BUFSIZ], *cp;
+ int i;
+#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
+ GETGROUPS_T groupset[NGROUPS_MAX];
+ int ngroups;
+#endif
+
+ sprintf(tbuf, "%d %d %d %d", getuid(), geteuid(), getgid(), getegid());
+
+ cp = tbuf + strlen(tbuf);
+#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
+ ngroups = getgroups(NGROUPS_MAX, groupset);
+ if (ngroups == -1)
+ fatal("could not find groups: %s", strerror(errno));
+
+ for (i = 0; i < ngroups; i++) {
+ *cp++ = ' ';
+ sprintf(cp, "%d", (int) groupset[i]);
+ cp += strlen(cp);
+ }
+#endif
+ *cp++ = '\n';
+ *cp++ = '\0';
+
+ i = strlen(tbuf);
+ spec_setup(iop, i, TRUE);
+ strcpy(iop->buf, tbuf);
+ return 0;
+}
+
+/* iop_open --- handle special and regular files for input */
+
+static IOBUF *
+iop_open(name, mode, iop)
+const char *name, *mode;
+IOBUF *iop;
+{
+ int openfd = INVALID_HANDLE;
+ int flag = 0;
+ struct stat buf;
+ static struct internal {
+ const char *name;
+ int compare;
+ int (*fp) P((IOBUF *, const char *, const char *));
+ IOBUF iob;
+ } table[] = {
+ { "/dev/fd/", 8, specfdopen },
+ { "/dev/stdin", 10, specfdopen },
+ { "/dev/stdout", 11, specfdopen },
+ { "/dev/stderr", 11, specfdopen },
+ { "/dev/pid", 8, pidopen },
+ { "/dev/ppid", 9, pidopen },
+ { "/dev/pgrpid", 11, pidopen },
+ { "/dev/user", 9, useropen },
+ };
+ int devcount = sizeof(table) / sizeof(table[0]);
+
+ flag = str2mode(mode);
+
+ /*
+ * FIXME: remove the stat call, and always process these files
+ * internally.
+ */
+ if (STREQ(name, "-"))
+ openfd = fileno(stdin);
+ else if (do_traditional)
+ goto strictopen;
+ else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
+ int i;
+
+ for (i = 0; i < devcount; i++) {
+ if (STREQN(name, table[i].name, table[i].compare)) {
+ iop = & table[i].iob;
+
+ if (iop->buf != NULL) {
+ spec_setup(iop, 0, FALSE);
+ return iop;
+ } else if ((*table[i].fp)(iop, name, mode) == 0)
+ return iop;
+ else {
+ warning("could not open %s, mode `%s'",
+ name, mode);
+ return NULL;
+ }
+ }
+ }
+ }
+
+strictopen:
+ if (openfd == INVALID_HANDLE)
+ openfd = open(name, flag, 0666);
+ if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
+ if ((buf.st_mode & S_IFMT) == S_IFDIR)
+ fatal("file `%s' is a directory", name);
+ return iop_alloc(openfd, name, iop);
+}
+
+#ifndef PIPES_SIMULATED /* real pipes */
+
+/* wait_any --- wait for a child process, close associated pipe */
+
+static int
+wait_any(interesting)
+int interesting; /* pid of interest, if any */
+{
+ RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)();
+ int pid;
+ int status = 0;
+ struct redirect *redp;
+ extern int errno;
+
+ hstat = signal(SIGHUP, SIG_IGN);
+ istat = signal(SIGINT, SIG_IGN);
+ qstat = signal(SIGQUIT, SIG_IGN);
+ for (;;) {
+#ifdef HAVE_SYS_WAIT_H /* Posix compatible sys/wait.h */
+ pid = wait(&status);
+#else
+ pid = wait((union wait *)&status);
+#endif /* NeXT */
+ if (interesting && pid == interesting) {
+ break;
+ } else if (pid != -1) {
+ for (redp = red_head; redp != NULL; redp = redp->next)
+ if (pid == redp->pid) {
+ redp->pid = -1;
+ redp->status = status;
+ break;
+ }
+ }
+ if (pid == -1 && errno == ECHILD)
+ break;
+ }
+ signal(SIGHUP, hstat);
+ signal(SIGINT, istat);
+ signal(SIGQUIT, qstat);
+ return(status);
+}
+
+/* gawk_popen --- open an IOBUF on a child process */
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ int p[2];
+ register int pid;
+
+ /*
+ * used to wait for any children to synchronize input and output,
+ * but this could cause gawk to hang when it is started in a pipeline
+ * and thus has a child process feeding it input (shell dependant)
+ */
+ /*(void) wait_any(0);*/ /* wait for outstanding processes */
+
+ if (pipe(p) < 0)
+ fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
+ if ((pid = fork()) == 0) {
+ if (close(1) == -1)
+ fatal("close of stdout in child failed (%s)",
+ strerror(errno));
+ if (dup(p[1]) != 1)
+ fatal("dup of pipe failed (%s)", strerror(errno));
+ if (close(p[0]) == -1 || close(p[1]) == -1)
+ fatal("close of pipe failed (%s)", strerror(errno));
+ execl("/bin/sh", "sh", "-c", cmd, NULL);
+ _exit(127);
+ }
+ if (pid == -1)
+ fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno));
+ rp->pid = pid;
+ if (close(p[1]) == -1)
+ fatal("close of pipe failed (%s)", strerror(errno));
+ rp->iop = iop_alloc(p[0], cmd, NULL);
+ if (rp->iop == NULL)
+ (void) close(p[0]);
+ return (rp->iop);
+}
+
+/* gawk_pclose --- close an open child pipe */
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ (void) iop_close(rp->iop);
+ rp->iop = NULL;
+
+ /* process previously found, return stored status */
+ if (rp->pid == -1)
+ return (rp->status >> 8) & 0xFF;
+ rp->status = wait_any(rp->pid);
+ rp->pid = -1;
+ return (rp->status >> 8) & 0xFF;
+}
+
+#else /* PIPES_SIMULATED */
+
+/*
+ * use temporary file rather than pipe
+ * except if popen() provides real pipes too
+ */
+
+#if defined(VMS) || defined(OS2) || defined (MSDOS)
+
+/* gawk_popen --- open an IOBUF on a child process */
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ FILE *current;
+
+ if ((current = popen(cmd, "r")) == NULL)
+ return NULL;
+ rp->iop = iop_alloc(fileno(current), cmd, NULL);
+ if (rp->iop == NULL) {
+ (void) fclose(current);
+ current = NULL;
+ }
+ rp->ifp = current;
+ return (rp->iop);
+}
+
+/* gawk_pclose --- close an open child pipe */
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ int rval, aval, fd = rp->iop->fd;
+
+ rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
+ rval = iop_close(rp->iop);
+ rp->iop = NULL;
+ aval = pclose(rp->ifp);
+ rp->ifp = NULL;
+ return (rval < 0 ? rval : aval);
+}
+#else /* not (VMS || OS2 || MSDOS) */
+
+static struct pipeinfo {
+ char *command;
+ char *name;
+} pipes[_NFILE];
+
+/* gawk_popen --- open an IOBUF on a child process */
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ extern char *strdup P((const char *));
+ int current;
+ char *name;
+ static char cmdbuf[256];
+
+ /* get a name to use */
+ if ((name = tempnam(".", "pip")) == NULL)
+ return NULL;
+ sprintf(cmdbuf, "%s > %s", cmd, name);
+ system(cmdbuf);
+ if ((current = open(name, O_RDONLY)) == INVALID_HANDLE)
+ return NULL;
+ pipes[current].name = name;
+ pipes[current].command = strdup(cmd);
+ rp->iop = iop_alloc(current, name, NULL);
+ if (rp->iop == NULL)
+ (void) close(current);
+ return (rp->iop);
+}
+
+/* gawk_pclose --- close an open child pipe */
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ int cur = rp->iop->fd;
+ int rval;
+
+ rval = iop_close(rp->iop);
+ rp->iop = NULL;
+
+ /* check for an open file */
+ if (pipes[cur].name == NULL)
+ return -1;
+ unlink(pipes[cur].name);
+ free(pipes[cur].name);
+ pipes[cur].name = NULL;
+ free(pipes[cur].command);
+ return rval;
+}
+#endif /* not (VMS || OS2 || MSDOS) */
+
+#endif /* PIPES_SIMULATED */
+
+/* do_getline --- read in a line, into var and with redirection, as needed */
+
+NODE *
+do_getline(tree)
+NODE *tree;
+{
+ struct redirect *rp = NULL;
+ IOBUF *iop;
+ int cnt = EOF;
+ char *s = NULL;
+ int errcode;
+
+ while (cnt == EOF) {
+ if (tree->rnode == NULL) { /* no redirection */
+ iop = nextfile(FALSE);
+ if (iop == NULL) /* end of input */
+ return tmp_number((AWKNUM) 0.0);
+ } else {
+ int redir_error = 0;
+
+ rp = redirect(tree->rnode, &redir_error);
+ if (rp == NULL && redir_error) { /* failed redirect */
+ if (! do_traditional) {
+ s = strerror(redir_error);
+
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value =
+ make_string(s, strlen(s));
+ }
+ return tmp_number((AWKNUM) -1.0);
+ }
+ iop = rp->iop;
+ if (iop == NULL) /* end of input */
+ return tmp_number((AWKNUM) 0.0);
+ }
+ errcode = 0;
+ cnt = (*(iop->getrec))(&s, iop, RS->stptr[0], RS_regexp, &errcode);
+ if (errcode != 0) {
+ if (! do_traditional) {
+ s = strerror(errcode);
+
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value = make_string(s, strlen(s));
+ }
+ return tmp_number((AWKNUM) -1.0);
+ }
+ if (cnt == EOF) {
+ if (rp != NULL) {
+ /*
+ * Don't do iop_close() here if we are
+ * reading from a pipe; otherwise
+ * gawk_pclose will not be called.
+ */
+ if ((rp->flag & RED_PIPE) == 0) {
+ (void) iop_close(iop);
+ rp->iop = NULL;
+ }
+ rp->flag |= RED_EOF; /* sticky EOF */
+ return tmp_number((AWKNUM) 0.0);
+ } else
+ continue; /* try another file */
+ }
+ if (rp == NULL) {
+ NR++;
+ FNR++;
+ }
+ if (tree->lnode == NULL) /* no optional var. */
+ set_record(s, cnt, TRUE);
+ else { /* assignment to variable */
+ Func_ptr after_assign = NULL;
+ NODE **lhs;
+
+ lhs = get_lhs(tree->lnode, &after_assign);
+ unref(*lhs);
+ *lhs = make_string(s, cnt);
+ (*lhs)->flags |= MAYBE_NUM;
+ /* we may have to regenerate $0 here! */
+ if (after_assign != NULL)
+ (*after_assign)();
+ }
+ }
+ return tmp_number((AWKNUM) 1.0);
+}
+
+/* pathopen --- pathopen with default file extension handling */
+
+int
+pathopen(file)
+const char *file;
+{
+ int fd = do_pathopen(file);
+
+#ifdef DEFAULT_FILETYPE
+ if (! do_traditional && fd <= INVALID_HANDLE) {
+ char *file_awk;
+ int save = errno;
+#ifdef VMS
+ int vms_save = vaxc$errno;
+#endif
+
+ /* append ".awk" and try again */
+ emalloc(file_awk, char *, strlen(file) +
+ sizeof(DEFAULT_FILETYPE) + 1, "pathopen");
+ sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE);
+ fd = do_pathopen(file_awk);
+ free(file_awk);
+ if (fd <= INVALID_HANDLE) {
+ errno = save;
+#ifdef VMS
+ vaxc$errno = vms_save;
+#endif
+ }
+ }
+#endif /*DEFAULT_FILETYPE*/
+
+ return fd;
+}
+
+/* do_pathopen --- search $AWKPATH for source file */
+
+static int
+do_pathopen(file)
+const char *file;
+{
+ static const char *savepath = NULL;
+ static int first = TRUE;
+ const char *awkpath;
+ char *cp, trypath[BUFSIZ];
+ int fd;
+
+ if (STREQ(file, "-"))
+ return (0);
+
+ if (do_traditional)
+ return (devopen(file, "r"));
+
+ if (first) {
+ first = FALSE;
+ if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath)
+ savepath = awkpath; /* used for restarting */
+ else
+ savepath = defpath;
+ }
+ awkpath = savepath;
+
+ /* some kind of path name, no search */
+ if (ispath(file))
+ return (devopen(file, "r"));
+
+ do {
+ trypath[0] = '\0';
+ /* this should take into account limits on size of trypath */
+ for (cp = trypath; *awkpath && *awkpath != envsep; )
+ *cp++ = *awkpath++;
+
+ if (cp != trypath) { /* nun-null element in path */
+ /* add directory punctuation only if needed */
+ if (! isdirpunct(*(cp-1)))
+ *cp++ = '/';
+ /* append filename */
+ strcpy(cp, file);
+ } else
+ strcpy(trypath, file);
+ if ((fd = devopen(trypath, "r")) > INVALID_HANDLE)
+ return (fd);
+
+ /* no luck, keep going */
+ if(*awkpath == envsep && awkpath[1] != '\0')
+ awkpath++; /* skip colon */
+ } while (*awkpath != '\0');
+ /*
+ * You might have one of the awk paths defined, WITHOUT the current
+ * working directory in it. Therefore try to open the file in the
+ * current directory.
+ */
+ return (devopen(file, "r"));
+}
+
+#ifdef TEST
+int bufsize = 8192;
+
+void
+fatal(s)
+char *s;
+{
+ printf("%s\n", s);
+ exit(1);
+}
+#endif
+
+/* iop_alloc --- allocate an IOBUF structure for an open fd */
+
+static IOBUF *
+iop_alloc(fd, name, iop)
+int fd;
+const char *name;
+IOBUF *iop;
+{
+ struct stat sbuf;
+
+ if (fd == INVALID_HANDLE)
+ return NULL;
+ if (iop == NULL)
+ emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
+ iop->flag = 0;
+ if (isatty(fd))
+ iop->flag |= IOP_IS_TTY;
+ iop->size = optimal_bufsize(fd, & sbuf);
+ if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0)
+ warning("data file `%s' is empty", name);
+ iop->secsiz = -2;
+ errno = 0;
+ iop->fd = fd;
+ iop->off = iop->buf = NULL;
+ iop->cnt = 0;
+ iop->name = name;
+ iop->getrec = get_a_record;
+#ifdef HAVE_MMAP
+ if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0) {
+ register char *cp;
+
+ iop->buf = iop->off = mmap((caddr_t) 0, sbuf.st_size,
+ PROT_READ|PROT_WRITE, MAP_PRIVATE,
+ fd, 0L);
+ /* cast is for buggy compilers (e.g. DEC OSF/1) */
+ if (iop->buf == (caddr_t)MAP_FAILED) {
+ iop->buf = iop->off = NULL;
+ goto out;
+ }
+
+ iop->flag |= IOP_MMAPPED;
+ iop->size = sbuf.st_size;
+ iop->secsiz = 0;
+ iop->end = iop->buf + iop->size;
+ iop->cnt = sbuf.st_size;
+ iop->getrec = mmap_get_record;
+ (void) close(fd);
+ iop->fd = INVALID_HANDLE;
+
+#if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL)
+ madvise(iop->buf, iop->size, MADV_SEQUENTIAL);
+#endif
+ /*
+ * The following is a really gross hack.
+ * We want to ensure that we have a copy of the input
+ * data that won't go away, on the off chance that someone
+ * will truncate the data file we've just mmap'ed.
+ * So, we go through and touch each page, forcing the
+ * system to give us a private copy. A page size of 512
+ * guarantees this will work, even on the least common
+ * denominator system (like, oh say, a VAX).
+ */
+ for (cp = iop->buf; cp < iop->end; cp += 512)
+ *cp = *cp;
+ }
+out:
+#endif /* HAVE_MMAP */
+ return iop;
+}
+
+/* These macros used by both record reading routines */
+#define set_RT_to_null() \
+ (void)(! do_traditional && (unref(RT_node->var_value), \
+ RT_node->var_value = Nnull_string))
+
+#define set_RT(str, len) \
+ (void)(! do_traditional && (unref(RT_node->var_value), \
+ RT_node->var_value = make_string(str, len)))
+
+/*
+ * get_a_record:
+ * Get the next record. Uses a "split buffer" where the latter part is
+ * the normal read buffer and the head part is an "overflow" area that is used
+ * when a record spans the end of the normal buffer, in which case the first
+ * part of the record is copied into the overflow area just before the
+ * normal buffer. Thus, the eventual full record can be returned as a
+ * contiguous area of memory with a minimum of copying. The overflow area
+ * is expanded as needed, so that records are unlimited in length.
+ * We also mark both the end of the buffer and the end of the read() with
+ * a sentinel character (the current record separator) so that the inside
+ * loop can run as a single test.
+ *
+ * Note that since we know or can compute the end of the read and the end
+ * of the buffer, the sentinel character does not get in the way of regexp
+ * based searching, since we simply search up to that character, but not
+ * including it.
+ */
+
+static int
+get_a_record(out, iop, grRS, RSre, errcode)
+char **out; /* pointer to pointer to data */
+IOBUF *iop; /* input IOP */
+register int grRS; /* first char in RS->stptr */
+Regexp *RSre; /* regexp for RS */
+int *errcode; /* pointer to error variable */
+{
+ register char *bp = iop->off;
+ char *bufend;
+ char *start = iop->off; /* beginning of record */
+ int rs;
+ static Regexp *RS_null_re = NULL;
+ Regexp *rsre = NULL;
+ int continuing = FALSE, continued = FALSE; /* used for re matching */
+ int onecase;
+
+ /* first time through */
+ if (RS_null_re == NULL) {
+ RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
+ if (RS_null_re == NULL)
+ fatal("internal error: file `%s', line %d\n",
+ __FILE__, __LINE__);
+ }
+
+ if (iop->cnt == EOF) { /* previous read hit EOF */
+ *out = NULL;
+ set_RT_to_null();
+ return EOF;
+ }
+
+ if (grRS == FALSE) /* special case: RS == "" */
+ rs = '\n';
+ else
+ rs = (char) grRS;
+
+ onecase = (IGNORECASE && isalpha(rs));
+ if (onecase)
+ rs = casetable[rs];
+
+ /* set up sentinel */
+ if (iop->buf) {
+ bufend = iop->buf + iop->size + iop->secsiz;
+ *bufend = rs; /* add sentinel to buffer */
+ } else
+ bufend = NULL;
+
+ for (;;) { /* break on end of record, read error or EOF */
+/* buffer mgmt, chunk #1 */
+ /*
+ * Following code is entered on the first call of this routine
+ * for a new iop, or when we scan to the end of the buffer.
+ * In the latter case, we copy the current partial record to
+ * the space preceding the normal read buffer. If necessary,
+ * we expand this space. This is done so that we can return
+ * the record as a contiguous area of memory.
+ */
+ if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
+ char *oldbuf = NULL;
+ char *oldsplit = iop->buf + iop->secsiz;
+ long len; /* record length so far */
+
+ len = bp - start;
+ if (len > iop->secsiz) {
+ /* expand secondary buffer */
+ if (iop->secsiz == -2)
+ iop->secsiz = 256;
+ while (len > iop->secsiz)
+ iop->secsiz *= 2;
+ oldbuf = iop->buf;
+ emalloc(iop->buf, char *,
+ iop->size+iop->secsiz+2, "get_a_record");
+ bufend = iop->buf + iop->size + iop->secsiz;
+ *bufend = rs;
+ }
+ if (len > 0) {
+ char *newsplit = iop->buf + iop->secsiz;
+
+ if (start < oldsplit) {
+ memcpy(newsplit - len, start,
+ oldsplit - start);
+ memcpy(newsplit - (bp - oldsplit),
+ oldsplit, bp - oldsplit);
+ } else
+ memcpy(newsplit - len, start, len);
+ }
+ bp = iop->end = iop->off = iop->buf + iop->secsiz;
+ start = bp - len;
+ if (oldbuf != NULL) {
+ free(oldbuf);
+ oldbuf = NULL;
+ }
+ }
+/* buffer mgmt, chunk #2 */
+ /*
+ * Following code is entered whenever we have no more data to
+ * scan. In most cases this will read into the beginning of
+ * the main buffer, but in some cases (terminal, pipe etc.)
+ * we may be doing smallish reads into more advanced positions.
+ */
+ if (bp >= iop->end) {
+ if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+ iop->cnt = EOF;
+ break;
+ }
+ iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
+ if (iop->cnt == -1) {
+ if (! do_traditional && errcode != NULL) {
+ *errcode = errno;
+ iop->cnt = EOF;
+ break;
+ } else
+ fatal("error reading input file `%s': %s",
+ iop->name, strerror(errno));
+ } else if (iop->cnt == 0) {
+ /*
+ * hit EOF before matching RS, so end
+ * the record and set RT to ""
+ */
+ iop->cnt = EOF;
+ /* see comments below about this test */
+ if (! continuing) {
+ set_RT_to_null();
+ break;
+ }
+ }
+ if (iop->cnt != EOF) {
+ iop->end += iop->cnt;
+ *iop->end = rs; /* reset the sentinel */
+ }
+ }
+/* buffers are now setup and filled with data */
+/* search for RS, #1, regexp based, or RS = "" */
+ /*
+ * Attempt to simplify the code a bit. The case where
+ * RS = "" can also be described by a regexp, RS = "\n\n+".
+ * The buffer managment and searching code can thus now
+ * use a common case (the one for regexps) both when RS is
+ * a regexp, and when RS = "". This particularly benefits
+ * us for keeping track of how many newlines were matched
+ * in order to set RT.
+ */
+ if (! do_traditional && RSre != NULL) /* regexp */
+ rsre = RSre;
+ else if (grRS == FALSE) /* RS = "" */
+ rsre = RS_null_re;
+ else
+ rsre = NULL;
+
+ /*
+ * Look for regexp match of RS. Non-match conditions are:
+ * 1. No match at all
+ * 2. Match of a null string
+ * 3. Match ends at exact end of buffer
+ * Number 3 is subtle; we have to add more to the buffer
+ * in case the match would have extended further into the
+ * file, since regexp match by definition always matches the
+ * longest possible match.
+ *
+ * It is even more subtle than you might think. Suppose
+ * the re matches at exactly the end of file. We don't know
+ * that until we try to add more to the buffer. Thus, we
+ * set a flag to indicate, that if eof really does happen,
+ * don't break early.
+ */
+ continuing = FALSE;
+ if (rsre != NULL) {
+ again:
+ /* cases 1 and 2 are simple, just keep going */
+ if (research(rsre, start, 0, iop->end - start, TRUE) == -1
+ || RESTART(rsre, start) == REEND(rsre, start)) {
+ bp = iop->end;
+ continue;
+ }
+ /* case 3, regex match at exact end */
+ if (start + REEND(rsre, start) >= iop->end) {
+ if (iop->cnt != EOF) {
+ bp = iop->end;
+ continuing = continued = TRUE;
+ continue;
+ }
+ }
+ /* got a match! */
+ /*
+ * Leading newlines at the beginning of the file
+ * should be ignored. Whew!
+ */
+ if (grRS == FALSE && RESTART(rsre, start) == 0) {
+ start += REEND(rsre, start);
+ goto again;
+ }
+ bp = start + RESTART(rsre, start);
+ set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
+ *bp = '\0';
+ iop->off = start + REEND(rsre, start);
+ break;
+ }
+/* search for RS, #2, RS = <single char> */
+ if (onecase) {
+ while (casetable[(int) *bp++] != rs)
+ continue;
+ } else {
+ while (*bp++ != rs)
+ continue;
+ }
+ set_RT(bp - 1, 1);
+
+ if (bp <= iop->end)
+ break;
+ else
+ bp--;
+
+ if ((iop->flag & IOP_IS_INTERNAL) != 0)
+ iop->cnt = bp - start;
+ }
+ if (iop->cnt == EOF
+ && (((iop->flag & IOP_IS_INTERNAL) != 0)
+ || (start == bp && ! continued))) {
+ *out = NULL;
+ set_RT_to_null();
+ return EOF;
+ }
+
+ if (do_traditional || rsre == NULL) {
+ char *bstart;
+
+ bstart = iop->off = bp;
+ bp--;
+ if (onecase ? casetable[(int) *bp] != rs : *bp != rs) {
+ bp++;
+ bstart = bp;
+ }
+ *bp = '\0';
+ } else if (grRS == FALSE && iop->cnt == EOF) {
+ /*
+ * special case, delete trailing newlines,
+ * should never be more than one.
+ */
+ while (bp[-1] == '\n')
+ bp--;
+ *bp = '\0';
+ }
+
+ *out = start;
+ return bp - start;
+}
+
+#ifdef TEST
+int
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ IOBUF *iop;
+ char *out;
+ int cnt;
+ char rs[2];
+
+ rs[0] = '\0';
+ if (argc > 1)
+ bufsize = atoi(argv[1]);
+ if (argc > 2)
+ rs[0] = *argv[2];
+ iop = iop_alloc(0, "stdin", NULL);
+ while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) {
+ fwrite(out, 1, cnt, stdout);
+ fwrite(rs, 1, 1, stdout);
+ }
+ return 0;
+}
+#endif
+
+#ifdef HAVE_MMAP
+/* mmap_get_record --- pull a record out of a memory-mapped file */
+
+static int
+mmap_get_record(out, iop, grRS, RSre, errcode)
+char **out; /* pointer to pointer to data */
+IOBUF *iop; /* input IOP */
+register int grRS; /* first char in RS->stptr */
+Regexp *RSre; /* regexp for RS */
+int *errcode; /* pointer to error variable */
+{
+ register char *bp = iop->off;
+ char *start = iop->off; /* beginning of record */
+ int rs;
+ static Regexp *RS_null_re = NULL;
+ Regexp *rsre = NULL;
+ int onecase;
+ register char *end = iop->end;
+ int cnt;
+
+ /* first time through */
+ if (RS_null_re == NULL) {
+ RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
+ if (RS_null_re == NULL)
+ fatal("internal error: file `%s', line %d\n",
+ __FILE__, __LINE__);
+ }
+
+ if (iop->off >= iop->end) { /* previous record was last */
+ *out = NULL;
+ set_RT_to_null();
+ iop->cnt = EOF; /* tested by higher level code */
+ return EOF;
+ }
+
+ if (grRS == FALSE) /* special case: RS == "" */
+ rs = '\n';
+ else
+ rs = (char) grRS;
+
+ onecase = (IGNORECASE && isalpha(rs));
+ if (onecase)
+ rs = casetable[rs];
+
+ /* if RS = "", skip leading newlines at the front of the file */
+ if (grRS == FALSE && iop->off == iop->buf) {
+ for (bp = iop->off; *bp == '\n'; bp++)
+ continue;
+
+ if (bp != iop->off)
+ iop->off = start = bp;
+ }
+
+ /*
+ * Regexp based searching. Either RS = "" or RS = <regex>
+ * See comments in get_a_record.
+ */
+ if (! do_traditional && RSre != NULL) /* regexp */
+ rsre = RSre;
+ else if (grRS == FALSE) /* RS = "" */
+ rsre = RS_null_re;
+ else
+ rsre = NULL;
+
+ /*
+ * Look for regexp match of RS. Non-match conditions are:
+ * 1. No match at all
+ * 2. Match of a null string
+ * 3. Match ends at exact end of buffer
+ *
+ * #1 means that the record ends the file
+ * and there is no text that actually matched RS.
+ *
+ * #2: is probably like #1.
+ *
+ * #3 is simple; since we have the whole file mapped, it's
+ * the last record in the file.
+ */
+ if (rsre != NULL) {
+ if (research(rsre, start, 0, iop->end - start, TRUE) == -1
+ || RESTART(rsre, start) == REEND(rsre, start)) {
+ /* no matching text, we have the record */
+ *out = start;
+ iop->off = iop->end; /* all done with the record */
+ set_RT_to_null();
+ /* special case, don't allow trailing newlines */
+ if (grRS == FALSE && *(iop->end - 1) == '\n')
+ return iop->end - start - 1;
+ else
+ return iop->end - start;
+
+ }
+ /* have a match */
+ *out = start;
+ bp = start + RESTART(rsre, start);
+ set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
+ *bp = '\0';
+ iop->off = start + REEND(rsre, start);
+ return bp - start;
+ }
+
+ /*
+ * RS = "?", i.e., one character based searching.
+ *
+ * Alas, we can't just plug the sentinel character in at
+ * the end of the mmapp'ed file ( *(iop->end) = rs; ). This
+ * works if we're lucky enough to have a file that does not
+ * take up all of its last disk block. But if we end up with
+ * file whose size is an even multiple of the disk block size,
+ * assigning past the end of it delivers a SIGBUS. So, we have to
+ * add the extra test in the while loop at the front that looks
+ * for going past the end of the mapped object. Sigh.
+ */
+ /* search for RS, #2, RS = <single char> */
+ if (onecase) {
+ while (bp < end && casetable[*bp++] != rs)
+ continue;
+ } else {
+ while (bp < end && *bp++ != rs)
+ continue;
+ }
+ cnt = (bp - start) - 1;
+ if (bp >= iop->end) {
+ /* at end, may have actually seen rs, or may not */
+ if (*(bp-1) == rs)
+ set_RT(bp - 1, 1); /* real RS seen */
+ else {
+ cnt++;
+ set_RT_to_null();
+ }
+ } else
+ set_RT(bp - 1, 1);
+
+ iop->off = bp;
+ *out = start;
+ return cnt;
+}
+#endif /* HAVE_MMAP */
+
+/* set_RS --- update things as appropriate when RS is set */
+
+void
+set_RS()
+{
+ static NODE *save_rs = NULL;
+
+ if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
+ return;
+ unref(save_rs);
+ save_rs = dupnode(RS_node->var_value);
+ RS_is_null = FALSE;
+ RS = force_string(RS_node->var_value);
+ if (RS_regexp != NULL) {
+ refree(RS_regexp);
+ RS_regexp = NULL;
+ }
+ if (RS->stlen == 0)
+ RS_is_null = TRUE;
+ else if (RS->stlen > 1)
+ RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE);
+
+ set_FS_if_not_FIELDWIDTHS();
+}
diff --git a/contrib/awk/main.c b/contrib/awk/main.c
new file mode 100644
index 0000000..92445de
--- /dev/null
+++ b/contrib/awk/main.c
@@ -0,0 +1,735 @@
+/*
+ * main.c -- Expression tree constructors and main program for gawk.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+#include "getopt.h"
+#include "patchlevel.h"
+
+static void usage P((int exitval, FILE *fp));
+static void copyleft P((void));
+static void cmdline_fs P((char *str));
+static void init_args P((int argc0, int argc, char *argv0, char **argv));
+static void init_vars P((void));
+static void pre_assign P((char *v));
+RETSIGTYPE catchsig P((int sig, int code));
+static void nostalgia P((void));
+static void version P((void));
+
+/* These nodes store all the special variables AWK uses */
+NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node;
+NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node;
+NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
+NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
+
+long NF;
+long NR;
+long FNR;
+int IGNORECASE;
+char *OFS;
+char *ORS;
+char *OFMT;
+
+/*
+ * CONVFMT is a convenience pointer for the current number to string format.
+ * We must supply an initial value to avoid recursion problems of
+ * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
+ * Fun, fun, fun, fun.
+ */
+char *CONVFMT = "%.6g";
+
+int errcount = 0; /* error counter, used by yyerror() */
+
+NODE *Nnull_string; /* The global null string */
+
+/* The name the program was invoked under, for error messages */
+const char *myname;
+
+/* A block of AWK code to be run before running the program */
+NODE *begin_block = NULL;
+
+/* A block of AWK code to be run after the last input file */
+NODE *end_block = NULL;
+
+int exiting = FALSE; /* Was an "exit" statement executed? */
+int exit_val = 0; /* optional exit value */
+
+#if defined(YYDEBUG) || defined(DEBUG)
+extern int yydebug;
+#endif
+
+struct src *srcfiles = NULL; /* source file name(s) */
+long numfiles = -1; /* how many source files */
+
+int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */
+int do_posix = FALSE; /* turn off gnu and unix extensions */
+int do_lint = FALSE; /* provide warnings about questionable stuff */
+int do_lint_old = FALSE; /* warn about stuff not in V7 awk */
+int do_nostalgia = FALSE; /* provide a blast from the past */
+int do_intervals = FALSE; /* allow {...,...} in regexps */
+
+int in_begin_rule = FALSE; /* we're in a BEGIN rule */
+int in_end_rule = FALSE; /* we're in a END rule */
+
+int output_is_tty = FALSE; /* control flushing of output */
+
+extern char *version_string; /* current version, for printing */
+
+/* The parse tree is stored here. */
+NODE *expression_value;
+
+static struct option optab[] = {
+ { "compat", no_argument, & do_traditional, 1 },
+ { "traditional", no_argument, & do_traditional, 1 },
+ { "lint", no_argument, & do_lint, 1 },
+ { "lint-old", no_argument, & do_lint_old, 1 },
+ { "posix", no_argument, & do_posix, 1 },
+ { "nostalgia", no_argument, & do_nostalgia, 1 },
+ { "copyleft", no_argument, NULL, 'C' },
+ { "copyright", no_argument, NULL, 'C' },
+ { "field-separator", required_argument, NULL, 'F' },
+ { "file", required_argument, NULL, 'f' },
+ { "re-interval", no_argument, & do_intervals, 1 },
+ { "source", required_argument, NULL, 's' },
+ { "assign", required_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { "usage", no_argument, NULL, 'u' },
+ { "help", no_argument, NULL, 'u' },
+#ifdef DEBUG
+ { "parsedebug", no_argument, NULL, 'D' },
+#endif
+ { NULL, 0, NULL, '\0' }
+};
+
+/* main --- process args, parse program, run it, clean up */
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+{
+ int c;
+ char *scan;
+ /* the + on the front tells GNU getopt not to rearrange argv */
+ const char *optlist = "+F:f:v:W;m:";
+ int stopped_early = FALSE;
+ int old_optind;
+ extern int optind;
+ extern int opterr;
+ extern char *optarg;
+
+ setlocale(LC_CTYPE, "");
+ setlocale(LC_COLLATE, "");
+
+ (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig);
+ (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
+#ifdef SIGBUS
+ (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig);
+#endif
+
+ myname = gawk_name(argv[0]);
+ argv[0] = (char *) myname;
+ os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
+
+ /* remove sccs gunk */
+ if (strncmp(version_string, "@(#)", 4) == 0)
+ version_string += 4;
+
+ if (argc < 2)
+ usage(1, stderr);
+
+ /* initialize the null string */
+ Nnull_string = make_string("", 0);
+ Nnull_string->numbr = 0.0;
+ Nnull_string->type = Node_val;
+ Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
+
+ /*
+ * Tell the regex routines how they should work.
+ * Do this before initializing variables, since
+ * they could want to do a regexp compile.
+ */
+ resetup();
+
+ /* Set up the special variables */
+ /*
+ * Note that this must be done BEFORE arg parsing else -F
+ * breaks horribly
+ */
+ init_vars();
+
+ /* worst case */
+ emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
+ memset(srcfiles, '\0', argc * sizeof(struct src));
+
+ /* we do error messages ourselves on invalid options */
+ opterr = FALSE;
+
+ /* option processing. ready, set, go! */
+ for (optopt = 0, old_optind = 1;
+ (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
+ optopt = 0, old_optind = optind) {
+ if (do_posix)
+ opterr = TRUE;
+
+ switch (c) {
+ case 'F':
+ cmdline_fs(optarg);
+ break;
+
+ case 'f':
+ /*
+ * a la MKS awk, allow multiple -f options.
+ * this makes function libraries real easy.
+ * most of the magic is in the scanner.
+ *
+ * The following is to allow for whitespace at the end
+ * of a #! /bin/gawk line in an executable file
+ */
+ scan = optarg;
+ while (ISSPACE(*scan))
+ scan++;
+
+ ++numfiles;
+ srcfiles[numfiles].stype = SOURCEFILE;
+ if (*scan == '\0')
+ srcfiles[numfiles].val = argv[optind++];
+ else
+ srcfiles[numfiles].val = optarg;
+ break;
+
+ case 'v':
+ pre_assign(optarg);
+ break;
+
+ case 'm':
+ /*
+ * Research awk extension.
+ * -mf nnn set # fields, gawk ignores
+ * -mr nnn set record length, ditto
+ */
+ if (do_lint)
+ warning("-m[fr] option irrelevant in gawk");
+ if (optarg[0] != 'r' && optarg[0] != 'f')
+ warning("-m option usage: `-m[fr] nnn'");
+ if (optarg[1] == '\0')
+ optind++;
+ break;
+
+ case 'W': /* gawk specific options - now in getopt_long */
+ fprintf(stderr, "%s: option `-W %s' unrecognized, ignored\n",
+ argv[0], optarg);
+ break;
+
+ /* These can only come from long form options */
+ case 'C':
+ copyleft();
+ break;
+
+ case 's':
+ if (optarg[0] == '\0')
+ warning("empty argument to --source ignored");
+ else {
+ srcfiles[++numfiles].stype = CMDLINE;
+ srcfiles[numfiles].val = optarg;
+ }
+ break;
+
+ case 'u':
+ usage(0, stdout); /* per coding stds */
+ break;
+
+ case 'V':
+ version();
+ break;
+
+#ifdef DEBUG
+ case 'D':
+ yydebug = 2;
+ break;
+#endif
+
+ case 0:
+ /*
+ * getopt_long found an option that sets a variable
+ * instead of returning a letter. Do nothing, just
+ * cycle around for the next one.
+ */
+ break;
+
+ case '?':
+ default:
+ /*
+ * New behavior. If not posix, an unrecognized
+ * option stops argument processing so that it can
+ * go into ARGV for the awk program to see. This
+ * makes use of ``#! /bin/gawk -f'' easier.
+ *
+ * However, it's never simple. If optopt is set,
+ * an option that requires an argument didn't get the
+ * argument. We care because if opterr is 0, then
+ * getopt_long won't print the error message for us.
+ */
+ if (! do_posix
+ && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
+ /*
+ * can't just do optind--. In case of an
+ * option with >= 2 letters, getopt_long
+ * won't have incremented optind.
+ */
+ optind = old_optind;
+ stopped_early = TRUE;
+ goto out;
+ } else if (optopt != '\0')
+ /* Use 1003.2 required message format */
+ fprintf(stderr,
+ "%s: option requires an argument -- %c\n",
+ myname, optopt);
+ /* else
+ let getopt print error message for us */
+ break;
+ }
+ }
+out:
+
+ if (do_nostalgia)
+ nostalgia();
+
+ /* check for POSIXLY_CORRECT environment variable */
+ if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
+ do_posix = TRUE;
+ if (do_lint)
+ warning(
+ "environment variable `POSIXLY_CORRECT' set: turning on --posix");
+ }
+
+ if (do_posix) {
+ if (do_traditional) /* both on command line */
+ warning("--posix overrides --traditional");
+ else
+ do_traditional = TRUE;
+ /*
+ * POSIX compliance also implies
+ * no GNU extensions either.
+ */
+ }
+
+ /*
+ * Tell the regex routines how they should work.
+ * Do this again, after argument processing, since do_posix
+ * and do_traditional are now paid attention to by resetup().
+ */
+ if (do_traditional || do_posix || do_intervals) {
+ resetup();
+
+ /* now handle RS and FS. have to be careful with FS */
+ set_RS();
+ if (using_fieldwidths()) {
+ set_FS();
+ set_FIELDWIDTHS();
+ } else
+ set_FS();
+ }
+
+#ifdef DEBUG
+ setbuf(stdout, (char *) NULL); /* make debugging easier */
+#endif
+ if (isatty(fileno(stdout)))
+ output_is_tty = TRUE;
+ /* No -f or --source options, use next arg */
+ if (numfiles == -1) {
+ if (optind > argc - 1 || stopped_early) /* no args left or no program */
+ usage(1, stderr);
+ srcfiles[++numfiles].stype = CMDLINE;
+ srcfiles[numfiles].val = argv[optind];
+ optind++;
+ }
+
+ init_args(optind, argc, (char *) myname, argv);
+ (void) tokexpand();
+
+ /* Read in the program */
+ if (yyparse() != 0 || errcount != 0)
+ exit(1);
+ /* recover any space from C based alloca */
+#ifdef C_ALLOCA
+ (void) alloca(0);
+#endif
+
+ /* Set up the field variables */
+ init_fields();
+
+ if (do_lint && begin_block == NULL && expression_value == NULL
+ && end_block == NULL)
+ warning("no program");
+
+ if (begin_block != NULL) {
+ in_begin_rule = TRUE;
+ (void) interpret(begin_block);
+ }
+ in_begin_rule = FALSE;
+ if (! exiting && (expression_value != NULL || end_block != NULL))
+ do_input();
+ if (end_block != NULL) {
+ in_end_rule = TRUE;
+ (void) interpret(end_block);
+ }
+ in_end_rule = FALSE;
+ if (close_io() != 0 && exit_val == 0)
+ exit_val = 1;
+ exit(exit_val); /* more portable */
+ return exit_val; /* to suppress warnings */
+}
+
+/* usage --- print usage information and exit */
+
+static void
+usage(exitval, fp)
+int exitval;
+FILE *fp;
+{
+ char *opt1 = " -f progfile [--]";
+ char *regops = " [POSIX or GNU style options]";
+
+ fprintf(fp, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n",
+ myname, regops, opt1, myname, regops, quote, quote);
+
+ /* GNU long options info. Gack. */
+ fputs("POSIX options:\t\tGNU long options:\n", fp);
+ fputs("\t-f progfile\t\t--file=progfile\n", fp);
+ fputs("\t-F fs\t\t\t--field-separator=fs\n", fp);
+ fputs("\t-v var=val\t\t--assign=var=val\n", fp);
+ fputs("\t-m[fr] val\n", fp);
+ fputs("\t-W compat\t\t--compat\n", fp);
+ fputs("\t-W copyleft\t\t--copyleft\n", fp);
+ fputs("\t-W copyright\t\t--copyright\n", fp);
+ fputs("\t-W help\t\t\t--help\n", fp);
+ fputs("\t-W lint\t\t\t--lint\n", fp);
+ fputs("\t-W lint-old\t\t--lint-old\n", fp);
+#ifdef NOSTALGIA
+ fputs("\t-W nostalgia\t\t--nostalgia\n", fp);
+#endif
+#ifdef DEBUG
+ fputs("\t-W parsedebug\t\t--parsedebug\n", fp);
+#endif
+ fputs("\t-W posix\t\t--posix\n", fp);
+ fputs("\t-W re-interval\t\t--re-interval\n", fp);
+ fputs("\t-W source=program-text\t--source=program-text\n", fp);
+ fputs("\t-W traditional\t\t--traditional\n", fp);
+ fputs("\t-W usage\t\t--usage\n", fp);
+ fputs("\t-W version\t\t--version\n", fp);
+ fputs("\nReport bugs to bug-gnu-utils@prep.ai.mit.edu,\n", fp);
+ fputs("with a Cc: to arnold@gnu.ai.mit.edu\n", fp);
+ exit(exitval);
+}
+
+/* copyleft --- print out the short GNU copyright information */
+
+static void
+copyleft()
+{
+ static char blurb_part1[] =
+"Copyright (C) 1989, 1991-1997 Free Software Foundation.\n\
+\n\
+This program is free software; you can redistribute it and/or modify\n\
+it under the terms of the GNU General Public License as published by\n\
+the Free Software Foundation; either version 2 of the License, or\n\
+(at your option) any later version.\n\
+\n";
+ static char blurb_part2[] =
+"This program is distributed in the hope that it will be useful,\n\
+but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
+GNU General Public License for more details.\n\
+\n";
+ static char blurb_part3[] =
+"You should have received a copy of the GNU General Public License\n\
+along with this program; if not, write to the Free Software\n\
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n";
+
+ /* multiple blurbs are needed for some brain dead compilers. */
+ fputs(blurb_part1, stdout);
+ fputs(blurb_part2, stdout);
+ fputs(blurb_part3, stdout);
+ fflush(stdout);
+ exit(0);
+}
+
+/* cmdline_fs --- set FS from the command line */
+
+static void
+cmdline_fs(str)
+char *str;
+{
+ register NODE **tmp;
+
+ tmp = get_lhs(FS_node, (Func_ptr *) 0);
+ unref(*tmp);
+ /*
+ * Only if in full compatibility mode check for the stupid special
+ * case so -F\t works as documented in awk book even though the shell
+ * hands us -Ft. Bleah!
+ *
+ * Thankfully, Posix didn't propogate this "feature".
+ */
+ if (str[0] == 't' && str[1] == '\0') {
+ if (do_lint)
+ warning("-Ft does not set FS to tab in POSIX awk");
+ if (do_traditional && ! do_posix)
+ str[0] = '\t';
+ }
+ *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
+ set_FS();
+}
+
+/* init_args --- set up ARGV from stuff on the command line */
+
+static void
+init_args(argc0, argc, argv0, argv)
+int argc0, argc;
+char *argv0;
+char **argv;
+{
+ int i, j;
+ NODE **aptr;
+
+ ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL));
+ aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
+ *aptr = make_string(argv0, strlen(argv0));
+ (*aptr)->flags |= MAYBE_NUM;
+ for (i = argc0, j = 1; i < argc; i++) {
+ aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
+ *aptr = make_string(argv[i], strlen(argv[i]));
+ (*aptr)->flags |= MAYBE_NUM;
+ j++;
+ }
+ ARGC_node = install("ARGC",
+ node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
+}
+
+/*
+ * Set all the special variables to their initial values.
+ * Note that some of the variables that have set_FOO routines should
+ * *N*O*T* have those routines called upon initialization, and thus
+ * they have NULL entries in that field. This is notably true of FS
+ * and IGNORECASE.
+ */
+struct varinit {
+ NODE **spec;
+ const char *name;
+ NODETYPE type;
+ const char *strval;
+ AWKNUM numval;
+ Func_ptr assign;
+};
+static struct varinit varinit[] = {
+{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT },
+{&NF_node, "NF", Node_NF, NULL, -1, set_NF },
+{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL },
+{&NR_node, "NR", Node_NR, NULL, 0, set_NR },
+{&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR },
+{&FS_node, "FS", Node_FS, " ", 0, NULL },
+{&RS_node, "RS", Node_RS, "\n", 0, set_RS },
+{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL },
+{&FILENAME_node, "FILENAME", Node_var, "", 0, NULL },
+{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS },
+{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS },
+{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT },
+{&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL },
+{&RSTART_node, "RSTART", Node_var, NULL, 0, NULL },
+{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL },
+{&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL },
+{&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL },
+{&RT_node, "RT", Node_var, "", 0, NULL },
+{0, NULL, Node_illegal, NULL, 0, NULL },
+};
+
+/* init_vars --- actually initialize everything in the symbol table */
+
+static void
+init_vars()
+{
+ register struct varinit *vp;
+
+ for (vp = varinit; vp->name; vp++) {
+ *(vp->spec) = install((char *) vp->name,
+ node(vp->strval == NULL ? make_number(vp->numval)
+ : make_string((char *) vp->strval,
+ strlen(vp->strval)),
+ vp->type, (NODE *) NULL));
+ (*(vp->spec))->flags |= SCALAR;
+ if (vp->assign)
+ (*(vp->assign))();
+ }
+}
+
+/* load_environ --- populate the ENVIRON array */
+
+void
+load_environ()
+{
+#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
+ extern char **environ;
+#endif
+ register char *var, *val, *cp;
+ NODE **aptr;
+ register int i;
+
+ ENVIRON_node = install("ENVIRON",
+ node(Nnull_string, Node_var, (NODE *) NULL));
+ for (i = 0; environ[i] != NULL; i++) {
+ static char nullstr[] = "";
+
+ var = environ[i];
+ val = strchr(var, '=');
+ if (val != NULL)
+ *val++ = '\0';
+ else
+ val = nullstr;
+ aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)));
+ *aptr = make_string(val, strlen(val));
+ (*aptr)->flags |= (MAYBE_NUM|SCALAR);
+
+ /* restore '=' so that system() gets a valid environment */
+ if (val != nullstr)
+ *--val = '=';
+ }
+ /*
+ * Put AWKPATH into ENVIRON if it's not there.
+ * This allows querying it from outside gawk.
+ */
+ if ((cp = getenv("AWKPATH")) == NULL) {
+ aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7));
+ *aptr = make_string(defpath, strlen(defpath));
+ (*aptr)->flags |= SCALAR;
+ }
+}
+
+/* arg_assign --- process a command-line assignment */
+
+char *
+arg_assign(arg)
+char *arg;
+{
+ char *cp, *cp2;
+ int badvar;
+ Func_ptr after_assign = NULL;
+ NODE *var;
+ NODE *it;
+ NODE **lhs;
+
+ cp = strchr(arg, '=');
+ if (cp != NULL) {
+ *cp++ = '\0';
+ /* first check that the variable name has valid syntax */
+ badvar = FALSE;
+ if (! isalpha(arg[0]) && arg[0] != '_')
+ badvar = TRUE;
+ else
+ for (cp2 = arg+1; *cp2; cp2++)
+ if (! isalnum(*cp2) && *cp2 != '_') {
+ badvar = TRUE;
+ break;
+ }
+ if (badvar)
+ fatal("illegal name `%s' in variable assignment", arg);
+
+ /*
+ * Recent versions of nawk expand escapes inside assignments.
+ * This makes sense, so we do it too.
+ */
+ it = make_str_node(cp, strlen(cp), SCAN);
+ it->flags |= (MAYBE_NUM|SCALAR);
+ var = variable(arg, FALSE, Node_var);
+ lhs = get_lhs(var, &after_assign);
+ unref(*lhs);
+ *lhs = it;
+ if (after_assign != NULL)
+ (*after_assign)();
+ *--cp = '='; /* restore original text of ARGV */
+ }
+ return cp;
+}
+
+/* pre_assign --- handle -v, print a message and die if a problem */
+
+static void
+pre_assign(v)
+char *v;
+{
+ if (arg_assign(v) == NULL) {
+ fprintf(stderr,
+ "%s: `%s' argument to `-v' not in `var=value' form\n",
+ myname, v);
+ usage(1, stderr);
+ }
+}
+
+/* catchsig --- catch signals */
+
+RETSIGTYPE
+catchsig(sig, code)
+int sig, code;
+{
+#ifdef lint
+ code = 0; sig = code; code = sig;
+#endif
+ if (sig == SIGFPE) {
+ fatal("floating point exception");
+ } else if (sig == SIGSEGV
+#ifdef SIGBUS
+ || sig == SIGBUS
+#endif
+ ) {
+ set_loc(__FILE__, __LINE__);
+ msg("fatal error: internal error");
+ /* fatal won't abort() if not compiled for debugging */
+ abort();
+ } else
+ cant_happen();
+ /* NOTREACHED */
+}
+
+/* nostalgia --- print the famous error message and die */
+
+static void
+nostalgia()
+{
+ fprintf(stderr, "awk: bailing out near line 1\n");
+ abort();
+}
+
+/* version --- print version message */
+
+static void
+version()
+{
+ printf("%s.%d\n", version_string, PATCHLEVEL);
+ /*
+ * Per GNU coding standards, print copyright info,
+ * then exit successfully, do nothing else.
+ */
+ copyleft();
+ exit(0);
+}
diff --git a/contrib/awk/missing.c b/contrib/awk/missing.c
new file mode 100644
index 0000000..7494d76
--- /dev/null
+++ b/contrib/awk/missing.c
@@ -0,0 +1,59 @@
+/*
+ * Do all necessary includes here, so that we don't have to worry about
+ * overlapping includes in the files in missing.d.
+ */
+#include "awk.h"
+
+
+#ifdef atarist
+/*
+ * this will work with gcc compiler - for other compilers you may
+ * have to replace path separators in this file into backslashes
+ */
+#include "atari/stack.c"
+#include "atari/tmpnam.c"
+#endif /* atarist */
+
+#ifndef HAVE_SYSTEM
+#ifdef atarist
+#include "atari/system.c"
+#else
+#include "missing/system.c"
+#endif
+#endif /* HAVE_SYSTEM */
+
+#ifndef HAVE_MEMCMP
+#include "missing/memcmp.c"
+#endif /* HAVE_MEMCMP */
+
+#ifndef HAVE_MEMCPY
+#include "missing/memcpy.c"
+#endif /* HAVE_MEMCPY */
+
+#ifndef HAVE_MEMSET
+#include "missing/memset.c"
+#endif /* HAVE_MEMSET */
+
+#ifndef HAVE_STRNCASECMP
+#include "missing/strncasecmp.c"
+#endif /* HAVE_STRCASE */
+
+#ifndef HAVE_STRERROR
+#include "missing/strerror.c"
+#endif /* HAVE_STRERROR */
+
+#ifndef HAVE_STRFTIME
+#include "missing/strftime.c"
+#endif /* HAVE_STRFTIME */
+
+#ifndef HAVE_STRCHR
+#include "missing/strchr.c"
+#endif /* HAVE_STRCHR */
+
+#ifndef HAVE_STRTOD
+#include "missing/strtod.c"
+#endif /* HAVE_STRTOD */
+
+#ifndef HAVE_TZSET
+#include "missing/tzset.c"
+#endif /* HAVE_TZSET */
diff --git a/contrib/awk/msg.c b/contrib/awk/msg.c
new file mode 100644
index 0000000..82fa422
--- /dev/null
+++ b/contrib/awk/msg.c
@@ -0,0 +1,189 @@
+/*
+ * msg.c - routines for error messages
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+
+int sourceline = 0;
+char *source = NULL;
+
+static char *srcfile = NULL;
+static int srcline;
+
+/* prototype needed for ansi / gcc */
+void err P((const char *s, const char *emsg, va_list argp));
+
+/* err --- print an error message with source line and file and record */
+
+/* VARARGS2 */
+void
+err(s, emsg, argp)
+const char *s;
+const char *emsg;
+va_list argp;
+{
+ char *file;
+
+ (void) fflush(stdout);
+ (void) fprintf(stderr, "%s: ", myname);
+#ifdef DEBUG
+ if (srcfile != NULL) {
+ fprintf(stderr, "%s:%d:", srcfile, srcline);
+ srcfile = NULL;
+ }
+#endif /* DEBUG */
+ if (sourceline != 0) {
+ if (source != NULL)
+ (void) fprintf(stderr, "%s:", source);
+ else
+ (void) fprintf(stderr, "cmd. line:");
+
+ (void) fprintf(stderr, "%d: ", sourceline);
+ }
+ if (FNR > 0) {
+ file = FILENAME_node->var_value->stptr;
+ (void) putc('(', stderr);
+ if (file)
+ (void) fprintf(stderr, "FILENAME=%s ", file);
+ (void) fprintf(stderr, "FNR=%ld) ", FNR);
+ }
+ (void) fprintf(stderr, s);
+ vfprintf(stderr, emsg, argp);
+ (void) fprintf(stderr, "\n");
+ (void) fflush(stderr);
+}
+
+/* msg --- take a varargs error message and print it */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+msg(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+msg(va_alist)
+va_dcl
+#endif
+{
+ va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+#endif
+ err("", mesg, args);
+ va_end(args);
+}
+
+/* warning --- print a warning message */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+warning(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+warning(va_alist)
+va_dcl
+#endif
+{
+ va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+#endif
+ err("warning: ", mesg, args);
+ va_end(args);
+}
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+error(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+error(va_alist)
+va_dcl
+#endif
+{
+ va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+#endif
+ err("error: ", mesg, args);
+ va_end(args);
+}
+
+/* set_loc --- set location where a fatal error happened */
+
+void
+set_loc(file, line)
+char *file;
+int line;
+{
+ srcfile = file;
+ srcline = line;
+}
+
+/* fatal --- print an error message and die */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+r_fatal(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+r_fatal(va_alist)
+va_dcl
+#endif
+{
+ va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
+ char *mesg;
+
+ va_start(args);
+ mesg = va_arg(args, char *);
+#endif
+ err("fatal: ", mesg, args);
+ va_end(args);
+#ifdef DEBUG
+ abort();
+#endif
+ exit(2);
+}
+
diff --git a/contrib/awk/node.c b/contrib/awk/node.c
new file mode 100644
index 0000000..6f10b9f
--- /dev/null
+++ b/contrib/awk/node.c
@@ -0,0 +1,515 @@
+/*
+ * node.c -- routines for node management
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+
+/* r_force_number --- force a value to be numeric */
+
+AWKNUM
+r_force_number(n)
+register NODE *n;
+{
+ register char *cp;
+ register char *cpend;
+ char save;
+ char *ptr;
+ unsigned int newflags;
+
+#ifdef DEBUG
+ if (n == NULL)
+ cant_happen();
+ if (n->type != Node_val)
+ cant_happen();
+ if(n->flags == 0)
+ cant_happen();
+ if (n->flags & NUM)
+ return n->numbr;
+#endif
+
+ /* all the conditionals are an attempt to avoid the expensive strtod */
+
+ n->numbr = 0.0;
+ n->flags |= NUM;
+
+ if (n->stlen == 0)
+ return 0.0;
+
+ cp = n->stptr;
+ if (ISALPHA(*cp))
+ return 0.0;
+
+ cpend = cp + n->stlen;
+ while (cp < cpend && isspace(*cp))
+ cp++;
+ if (cp == cpend || isalpha(*cp))
+ return 0.0;
+
+ if (n->flags & MAYBE_NUM) {
+ newflags = NUMBER;
+ n->flags &= ~MAYBE_NUM;
+ } else
+ newflags = 0;
+ if (cpend - cp == 1) {
+ if (ISDIGIT(*cp)) {
+ n->numbr = (AWKNUM)(*cp - '0');
+ n->flags |= newflags;
+ }
+ return n->numbr;
+ }
+
+#ifdef NONDECDATA
+ errno = 0;
+ if (! do_traditional && isnondecimal(cp)) {
+ n->numbr = nondec2awknum(cp, cpend - cp);
+ goto finish;
+ }
+#endif /* NONDECDATA */
+
+ errno = 0;
+ save = *cpend;
+ *cpend = '\0';
+ n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);
+
+ /* POSIX says trailing space is OK for NUMBER */
+ while (ISSPACE(*ptr))
+ ptr++;
+ *cpend = save;
+finish:
+ /* the >= should be ==, but for SunOS 3.5 strtod() */
+ if (errno == 0 && ptr >= cpend)
+ n->flags |= newflags;
+ else
+ errno = 0;
+
+ return n->numbr;
+}
+
+/*
+ * the following lookup table is used as an optimization in force_string
+ * (more complicated) variations on this theme didn't seem to pay off, but
+ * systematic testing might be in order at some point
+ */
+static const char *values[] = {
+ "0",
+ "1",
+ "2",
+ "3",
+ "4",
+ "5",
+ "6",
+ "7",
+ "8",
+ "9",
+};
+#define NVAL (sizeof(values)/sizeof(values[0]))
+
+/* format_val --- format a numeric value based on format */
+
+NODE *
+format_val(format, index, s)
+char *format;
+int index;
+register NODE *s;
+{
+ char buf[128];
+ register char *sp = buf;
+ double val;
+
+ /* not an integral value, or out of range */
+ if ((val = double_to_int(s->numbr)) != s->numbr
+ || val < LONG_MIN || val > LONG_MAX) {
+#ifdef GFMT_WORKAROUND
+ NODE *dummy, *r;
+ unsigned short oflags;
+ extern NODE *format_tree P((const char *, int, NODE *));
+ extern NODE **fmt_list; /* declared in eval.c */
+
+ /* create dummy node for a sole use of format_tree */
+ getnode(dummy);
+ dummy->lnode = s;
+ dummy->rnode = NULL;
+ oflags = s->flags;
+ s->flags |= PERM; /* prevent from freeing by format_tree() */
+ r = format_tree(format, fmt_list[index]->stlen, dummy);
+ s->flags = oflags;
+ s->stfmt = (char) index;
+ s->stlen = r->stlen;
+ s->stptr = r->stptr;
+ freenode(r); /* Do not free_temp(r)! We want */
+ freenode(dummy); /* to keep s->stptr == r->stpr. */
+
+ goto no_malloc;
+#else
+ /*
+ * no need for a "replacement" formatting by gawk,
+ * just use sprintf
+ */
+ sprintf(sp, format, s->numbr);
+ s->stlen = strlen(sp);
+ s->stfmt = (char) index;
+#endif /* GFMT_WORKAROUND */
+ } else {
+ /* integral value */
+ /* force conversion to long only once */
+ register long num = (long) val;
+ if (num < NVAL && num >= 0) {
+ sp = (char *) values[num];
+ s->stlen = 1;
+ } else {
+ (void) sprintf(sp, "%ld", num);
+ s->stlen = strlen(sp);
+ }
+ s->stfmt = -1;
+ }
+ emalloc(s->stptr, char *, s->stlen + 2, "force_string");
+ memcpy(s->stptr, sp, s->stlen+1);
+#ifdef GFMT_WORKAROUND
+no_malloc:
+#endif /* GFMT_WORKAROUND */
+ s->stref = 1;
+ s->flags |= STR;
+ return s;
+}
+
+/* r_force_string --- force a value to be a string */
+
+NODE *
+r_force_string(s)
+register NODE *s;
+{
+#ifdef DEBUG
+ if (s == NULL)
+ cant_happen();
+ if (s->type != Node_val)
+ cant_happen();
+ if ((s->flags & NUM) == 0)
+ cant_happen();
+ if (s->stref <= 0)
+ cant_happen();
+ if ((s->flags & STR) != 0
+ && (s->stfmt == -1 || s->stfmt == CONVFMTidx))
+ return s;
+#endif
+
+ return format_val(CONVFMT, CONVFMTidx, s);
+}
+
+/*
+ * dupnode:
+ * Duplicate a node. (For strings, "duplicate" means crank up the
+ * reference count.)
+ */
+
+NODE *
+dupnode(n)
+NODE *n;
+{
+ register NODE *r;
+
+ if ((n->flags & TEMP) != 0) {
+ n->flags &= ~TEMP;
+ n->flags |= MALLOC;
+ return n;
+ }
+ if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) {
+ if (n->stref < LONG_MAX)
+ n->stref++;
+ return n;
+ }
+ getnode(r);
+ *r = *n;
+ r->flags &= ~(PERM|TEMP);
+ r->flags |= MALLOC;
+ if (n->type == Node_val && (n->flags & STR) != 0) {
+ r->stref = 1;
+ emalloc(r->stptr, char *, r->stlen + 2, "dupnode");
+ memcpy(r->stptr, n->stptr, r->stlen);
+ r->stptr[r->stlen] = '\0';
+ }
+ return r;
+}
+
+/* mk_number --- allocate a node with defined number */
+
+NODE *
+mk_number(x, flags)
+AWKNUM x;
+unsigned int flags;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_val;
+ r->numbr = x;
+ r->flags = flags | SCALAR;
+#ifdef DEBUG
+ r->stref = 1;
+ r->stptr = NULL;
+ r->stlen = 0;
+#endif
+ return r;
+}
+
+/* make_str_node --- make a string node */
+
+NODE *
+make_str_node(s, len, flags)
+char *s;
+size_t len;
+int flags;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_val;
+ r->flags = (STRING|STR|MALLOC|SCALAR);
+ if (flags & ALREADY_MALLOCED)
+ r->stptr = s;
+ else {
+ emalloc(r->stptr, char *, len + 2, s);
+ memcpy(r->stptr, s, len);
+ }
+ r->stptr[len] = '\0';
+
+ if ((flags & SCAN) != 0) { /* scan for escape sequences */
+ char *pf;
+ register char *ptm;
+ register int c;
+ register char *end;
+
+ end = &(r->stptr[len]);
+ for (pf = ptm = r->stptr; pf < end;) {
+ c = *pf++;
+ if (c == '\\') {
+ c = parse_escape(&pf);
+ if (c < 0) {
+ if (do_lint)
+ warning("backslash at end of string");
+ c = '\\';
+ }
+ *ptm++ = c;
+ } else
+ *ptm++ = c;
+ }
+ len = ptm - r->stptr;
+ erealloc(r->stptr, char *, len + 1, "make_str_node");
+ r->stptr[len] = '\0';
+ r->flags |= PERM;
+ }
+ r->stlen = len;
+ r->stref = 1;
+ r->stfmt = -1;
+
+ return r;
+}
+
+/* tmp_string --- allocate a temporary string */
+
+NODE *
+tmp_string(s, len)
+char *s;
+size_t len;
+{
+ register NODE *r;
+
+ r = make_string(s, len);
+ r->flags |= TEMP;
+ return r;
+}
+
+/* more_nodes --- allocate more nodes */
+
+#define NODECHUNK 100
+
+NODE *nextfree = NULL;
+
+NODE *
+more_nodes()
+{
+ register NODE *np;
+
+ /* get more nodes and initialize list */
+ emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode");
+ for (np = nextfree; np <= &nextfree[NODECHUNK - 1]; np++) {
+ np->flags = 0;
+ np->nextp = np + 1;
+ }
+ --np;
+ np->nextp = NULL;
+ np = nextfree;
+ nextfree = nextfree->nextp;
+ return np;
+}
+
+#ifdef DEBUG
+/* freenode --- release a node back to the pool */
+
+void
+freenode(it)
+NODE *it;
+{
+ it->flags &= ~SCALAR;
+#ifdef MPROF
+ it->stref = 0;
+ free((char *) it);
+#else /* not MPROF */
+ /* add it to head of freelist */
+ it->nextp = nextfree;
+ nextfree = it;
+#endif /* not MPROF */
+}
+#endif /* DEBUG */
+
+/* unref --- remove reference to a particular node */
+
+void
+unref(tmp)
+register NODE *tmp;
+{
+ if (tmp == NULL)
+ return;
+ if ((tmp->flags & PERM) != 0)
+ return;
+ if ((tmp->flags & (MALLOC|TEMP)) != 0) {
+ tmp->flags &= ~TEMP;
+ if ((tmp->flags & STR) != 0) {
+ if (tmp->stref > 1) {
+ if (tmp->stref != LONG_MAX)
+ tmp->stref--;
+ return;
+ }
+ free(tmp->stptr);
+ }
+ freenode(tmp);
+ return;
+ }
+ if ((tmp->flags & FIELD) != 0) {
+ freenode(tmp);
+ return;
+ }
+}
+
+/*
+ * parse_escape:
+ *
+ * Parse a C escape sequence. STRING_PTR points to a variable containing a
+ * pointer to the string to parse. That pointer is updated past the
+ * characters we use. The value of the escape sequence is returned.
+ *
+ * A negative value means the sequence \ newline was seen, which is supposed to
+ * be equivalent to nothing at all.
+ *
+ * If \ is followed by a null character, we return a negative value and leave
+ * the string pointer pointing at the null character.
+ *
+ * If \ is followed by 000, we return 0 and leave the string pointer after the
+ * zeros. A value of 0 does not mean end of string.
+ *
+ * Posix doesn't allow \x.
+ */
+
+int
+parse_escape(string_ptr)
+char **string_ptr;
+{
+ register int c = *(*string_ptr)++;
+ register int i;
+ register int count;
+
+ switch (c) {
+ case 'a':
+ return BELL;
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\n':
+ return -2;
+ case 0:
+ (*string_ptr)--;
+ return -1;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ i = c - '0';
+ count = 0;
+ while (++count < 3) {
+ if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+ i *= 8;
+ i += c - '0';
+ } else {
+ (*string_ptr)--;
+ break;
+ }
+ }
+ return i;
+ case 'x':
+ if (do_lint) {
+ static int didwarn = FALSE;
+
+ if (! didwarn) {
+ didwarn = TRUE;
+ warning("POSIX does not allow \"\\x\" escapes");
+ }
+ }
+ if (do_posix)
+ return ('x');
+ if (! isxdigit((*string_ptr)[0])) {
+ warning("no hex digits in \\x escape sequence");
+ return ('x');
+ }
+ i = 0;
+ for (;;) {
+ if (ISXDIGIT((c = *(*string_ptr)++))) {
+ i *= 16;
+ if (ISDIGIT(c))
+ i += c - '0';
+ else if (ISUPPER(c))
+ i += c - 'A' + 10;
+ else
+ i += c - 'a' + 10;
+ } else {
+ (*string_ptr)--;
+ break;
+ }
+ }
+ return i;
+ default:
+ return c;
+ }
+}
diff --git a/contrib/awk/patchlevel.h b/contrib/awk/patchlevel.h
new file mode 100644
index 0000000..f360824
--- /dev/null
+++ b/contrib/awk/patchlevel.h
@@ -0,0 +1 @@
+#define PATCHLEVEL 3
diff --git a/contrib/awk/posix/ChangeLog b/contrib/awk/posix/ChangeLog
new file mode 100644
index 0000000..575baa1
--- /dev/null
+++ b/contrib/awk/posix/ChangeLog
@@ -0,0 +1,19 @@
+Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.3: Release tar file made.
+
+Fri Apr 18 07:55:47 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * BETA Release 3.0.34: Release tar file made.
+
+Wed Dec 25 11:25:22 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.2: Release tar file made.
+
+Tue Dec 10 23:09:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.1: Release tar file made.
+
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/contrib/awk/posix/gawkmisc.c b/contrib/awk/posix/gawkmisc.c
new file mode 100644
index 0000000..68bfb5d
--- /dev/null
+++ b/contrib/awk/posix/gawkmisc.c
@@ -0,0 +1,108 @@
+/* gawkmisc.c --- miscellanious gawk routines that are OS specific.
+
+ Copyright (C) 1986, 1988, 1989, 1991 - 96 the Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+char quote = '\'';
+char *defpath = DEFPATH;
+char envsep = ':';
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+ char *p;
+
+ /* "path/name" -> "name" */
+ p = strrchr(filespec, '/');
+ return (p == NULL ? (char *) filespec : p + 1);
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+ /* no-op */
+ return;
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+ /* no-op */
+ return INVALID_HANDLE;
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+ /* force all members to zero in case OS doesn't use all of them. */
+ memset(stb, '\0', sizeof(struct stat));
+
+ /*
+ * System V.n, n < 4, doesn't have the file system block size in the
+ * stat structure. So we have to make some sort of reasonable
+ * guess. We use stdio's BUFSIZ, since that is what it was
+ * meant for in the first place.
+ */
+#ifdef HAVE_ST_BLKSIZE
+#define DEFBLKSIZE (stb->st_blksize ? stb->st_blksize : BUFSIZ)
+#else
+#define DEFBLKSIZE BUFSIZ
+#endif
+
+ if (isatty(fd))
+ return BUFSIZ;
+ if (fstat(fd, stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */
+ return DEFBLKSIZE;
+ if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */
+ return stb->st_size;
+ return DEFBLKSIZE;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+ return (strchr(file, '/') != NULL);
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+ return (c == '/');
+}
+
diff --git a/contrib/awk/random.c b/contrib/awk/random.c
new file mode 100644
index 0000000..002b226
--- /dev/null
+++ b/contrib/awk/random.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 1983 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)random.c 5.5 (Berkeley) 7/6/88";
+#endif /* LIBC_SCCS and not lint */
+
+#include <stdio.h>
+#include "random.h" /* GAWK ADDITION */
+
+/*
+ * random.c:
+ * An improved random number generation package. In addition to the standard
+ * rand()/srand() like interface, this package also has a special state info
+ * interface. The initstate() routine is called with a seed, an array of
+ * bytes, and a count of how many bytes are being passed in; this array is then
+ * initialized to contain information for random number generation with that
+ * much state information. Good sizes for the amount of state information are
+ * 32, 64, 128, and 256 bytes. The state can be switched by calling the
+ * setstate() routine with the same array as was initiallized with initstate().
+ * By default, the package runs with 128 bytes of state information and
+ * generates far better random numbers than a linear congruential generator.
+ * If the amount of state information is less than 32 bytes, a simple linear
+ * congruential R.N.G. is used.
+ * Internally, the state information is treated as an array of longs; the
+ * zeroeth element of the array is the type of R.N.G. being used (small
+ * integer); the remainder of the array is the state information for the
+ * R.N.G. Thus, 32 bytes of state information will give 7 longs worth of
+ * state information, which will allow a degree seven polynomial. (Note: the
+ * zeroeth word of state information also has some other information stored
+ * in it -- see setstate() for details).
+ * The random number generation technique is a linear feedback shift register
+ * approach, employing trinomials (since there are fewer terms to sum up that
+ * way). In this approach, the least significant bit of all the numbers in
+ * the state table will act as a linear feedback shift register, and will have
+ * period 2^deg - 1 (where deg is the degree of the polynomial being used,
+ * assuming that the polynomial is irreducible and primitive). The higher
+ * order bits will have longer periods, since their values are also influenced
+ * by pseudo-random carries out of the lower bits. The total period of the
+ * generator is approximately deg*(2**deg - 1); thus doubling the amount of
+ * state information has a vast influence on the period of the generator.
+ * Note: the deg*(2**deg - 1) is an approximation only good for large deg,
+ * when the period of the shift register is the dominant factor. With deg
+ * equal to seven, the period is actually much longer than the 7*(2**7 - 1)
+ * predicted by this formula.
+ */
+
+
+
+/*
+ * For each of the currently supported random number generators, we have a
+ * break value on the amount of state information (you need at least this
+ * many bytes of state info to support this random number generator), a degree
+ * for the polynomial (actually a trinomial) that the R.N.G. is based on, and
+ * the separation between the two lower order coefficients of the trinomial.
+ */
+
+#define TYPE_0 0 /* linear congruential */
+#define BREAK_0 8
+#define DEG_0 0
+#define SEP_0 0
+
+#define TYPE_1 1 /* x**7 + x**3 + 1 */
+#define BREAK_1 32
+#define DEG_1 7
+#define SEP_1 3
+
+#define TYPE_2 2 /* x**15 + x + 1 */
+#define BREAK_2 64
+#define DEG_2 15
+#define SEP_2 1
+
+#define TYPE_3 3 /* x**31 + x**3 + 1 */
+#define BREAK_3 128
+#define DEG_3 31
+#define SEP_3 3
+#ifdef _CRAY
+#define DEG_3_P1 32 /* bug - do addition here */
+#define SEP_3_P1 4 /* *_3 + 1 = _3_P1 */
+#endif
+
+#define TYPE_4 4 /* x**63 + x + 1 */
+#define BREAK_4 256
+#define DEG_4 63
+#define SEP_4 1
+
+
+/*
+ * Array versions of the above information to make code run faster -- relies
+ * on fact that TYPE_i == i.
+ */
+
+#define MAX_TYPES 5 /* max number of types above */
+
+static int degrees[ MAX_TYPES ] = { DEG_0, DEG_1, DEG_2,
+ DEG_3, DEG_4 };
+
+static int seps[ MAX_TYPES ] = { SEP_0, SEP_1, SEP_2,
+ SEP_3, SEP_4 };
+
+
+
+/*
+ * Initially, everything is set up as if from :
+ * initstate( 1, &randtbl, 128 );
+ * Note that this initialization takes advantage of the fact that srandom()
+ * advances the front and rear pointers 10*rand_deg times, and hence the
+ * rear pointer which starts at 0 will also end up at zero; thus the zeroeth
+ * element of the state information, which contains info about the current
+ * position of the rear pointer is just
+ * MAX_TYPES*(rptr - state) + TYPE_3 == TYPE_3.
+ */
+
+static long randtbl[ DEG_3 + 1 ] = { TYPE_3,
+ 0x9a319039, 0x32d9c024, 0x9b663182, 0x5da1f342,
+ 0xde3b81e0, 0xdf0a6fb5, 0xf103bc02, 0x48f340fb,
+ 0x7449e56b, 0xbeb1dbb0, 0xab5c5918, 0x946554fd,
+ 0x8c2e680f, 0xeb3d799f, 0xb11ee0b7, 0x2d436b86,
+ 0xda672e2a, 0x1588ca88, 0xe369735d, 0x904f35f7,
+ 0xd7158fd6, 0x6fa6f051, 0x616e6b96, 0xac94efdc,
+ 0x36413f93, 0xc622c298, 0xf5a42ab8, 0x8a88d77b,
+ 0xf5ad9d0e, 0x8999220b, 0x27fb47b9 };
+
+/*
+ * fptr and rptr are two pointers into the state info, a front and a rear
+ * pointer. These two pointers are always rand_sep places aparts, as they cycle
+ * cyclically through the state information. (Yes, this does mean we could get
+ * away with just one pointer, but the code for random() is more efficient this
+ * way). The pointers are left positioned as they would be from the call
+ * initstate( 1, randtbl, 128 )
+ * (The position of the rear pointer, rptr, is really 0 (as explained above
+ * in the initialization of randtbl) because the state table pointer is set
+ * to point to randtbl[1] (as explained below).
+ */
+
+#ifdef _CRAY
+static long *fptr = &randtbl[ SEP_3_P1 ];
+#else
+static long *fptr = &randtbl[ SEP_3 + 1 ];
+#endif
+static long *rptr = &randtbl[ 1 ];
+
+
+
+/*
+ * The following things are the pointer to the state information table,
+ * the type of the current generator, the degree of the current polynomial
+ * being used, and the separation between the two pointers.
+ * Note that for efficiency of random(), we remember the first location of
+ * the state information, not the zeroeth. Hence it is valid to access
+ * state[-1], which is used to store the type of the R.N.G.
+ * Also, we remember the last location, since this is more efficient than
+ * indexing every time to find the address of the last element to see if
+ * the front and rear pointers have wrapped.
+ */
+
+static long *state = &randtbl[ 1 ];
+
+static int rand_type = TYPE_3;
+static int rand_deg = DEG_3;
+static int rand_sep = SEP_3;
+
+#ifdef _CRAY
+static long *end_ptr = &randtbl[ DEG_3_P1 ];
+#else
+static long *end_ptr = &randtbl[ DEG_3 + 1 ];
+#endif
+
+
+
+/*
+ * srandom:
+ * Initialize the random number generator based on the given seed. If the
+ * type is the trivial no-state-information type, just remember the seed.
+ * Otherwise, initializes state[] based on the given "seed" via a linear
+ * congruential generator. Then, the pointers are set to known locations
+ * that are exactly rand_sep places apart. Lastly, it cycles the state
+ * information a given number of times to get rid of any initial dependencies
+ * introduced by the L.C.R.N.G.
+ * Note that the initialization of randtbl[] for default usage relies on
+ * values produced by this routine.
+ */
+
+void
+srandom( x )
+
+ unsigned x;
+{
+ register int i, j;
+ long random();
+
+ if( rand_type == TYPE_0 ) {
+ state[ 0 ] = x;
+ }
+ else {
+ j = 1;
+ state[ 0 ] = x;
+ for( i = 1; i < rand_deg; i++ ) {
+ state[i] = 1103515245*state[i - 1] + 12345;
+ }
+ fptr = &state[ rand_sep ];
+ rptr = &state[ 0 ];
+ for( i = 0; i < 10*rand_deg; i++ ) random();
+ }
+}
+
+
+
+/*
+ * initstate:
+ * Initialize the state information in the given array of n bytes for
+ * future random number generation. Based on the number of bytes we
+ * are given, and the break values for the different R.N.G.'s, we choose
+ * the best (largest) one we can and set things up for it. srandom() is
+ * then called to initialize the state information.
+ * Note that on return from srandom(), we set state[-1] to be the type
+ * multiplexed with the current value of the rear pointer; this is so
+ * successive calls to initstate() won't lose this information and will
+ * be able to restart with setstate().
+ * Note: the first thing we do is save the current state, if any, just like
+ * setstate() so that it doesn't matter when initstate is called.
+ * Returns a pointer to the old state.
+ */
+
+char *
+initstate( seed, arg_state, n )
+
+ unsigned seed; /* seed for R. N. G. */
+ char *arg_state; /* pointer to state array */
+ int n; /* # bytes of state info */
+{
+ register char *ostate = (char *)( &state[ -1 ] );
+
+ if( rand_type == TYPE_0 ) state[ -1 ] = rand_type;
+ else state[ -1 ] = MAX_TYPES*(rptr - state) + rand_type;
+ if( n < BREAK_1 ) {
+ if( n < BREAK_0 ) {
+ fprintf( stderr, "initstate: not enough state (%d bytes) with which to do jack; ignored.\n", n );
+ return 0;
+ }
+ rand_type = TYPE_0;
+ rand_deg = DEG_0;
+ rand_sep = SEP_0;
+ }
+ else {
+ if( n < BREAK_2 ) {
+ rand_type = TYPE_1;
+ rand_deg = DEG_1;
+ rand_sep = SEP_1;
+ }
+ else {
+ if( n < BREAK_3 ) {
+ rand_type = TYPE_2;
+ rand_deg = DEG_2;
+ rand_sep = SEP_2;
+ }
+ else {
+ if( n < BREAK_4 ) {
+ rand_type = TYPE_3;
+ rand_deg = DEG_3;
+ rand_sep = SEP_3;
+ }
+ else {
+ rand_type = TYPE_4;
+ rand_deg = DEG_4;
+ rand_sep = SEP_4;
+ }
+ }
+ }
+ }
+ state = &( ( (long *)arg_state )[1] ); /* first location */
+ end_ptr = &state[ rand_deg ]; /* must set end_ptr before srandom */
+ srandom( seed );
+ if( rand_type == TYPE_0 ) state[ -1 ] = rand_type;
+ else state[ -1 ] = MAX_TYPES*(rptr - state) + rand_type;
+ return( ostate );
+}
+
+
+
+/*
+ * setstate:
+ * Restore the state from the given state array.
+ * Note: it is important that we also remember the locations of the pointers
+ * in the current state information, and restore the locations of the pointers
+ * from the old state information. This is done by multiplexing the pointer
+ * location into the zeroeth word of the state information.
+ * Note that due to the order in which things are done, it is OK to call
+ * setstate() with the same state as the current state.
+ * Returns a pointer to the old state information.
+ */
+
+char *
+setstate( arg_state )
+
+ char *arg_state;
+{
+ register long *new_state = (long *)arg_state;
+ register int type = new_state[0]%MAX_TYPES;
+ register int rear = new_state[0]/MAX_TYPES;
+ char *ostate = (char *)( &state[ -1 ] );
+
+ if( rand_type == TYPE_0 ) state[ -1 ] = rand_type;
+ else state[ -1 ] = MAX_TYPES*(rptr - state) + rand_type;
+ switch( type ) {
+ case TYPE_0:
+ case TYPE_1:
+ case TYPE_2:
+ case TYPE_3:
+ case TYPE_4:
+ rand_type = type;
+ rand_deg = degrees[ type ];
+ rand_sep = seps[ type ];
+ break;
+
+ default:
+ fprintf( stderr, "setstate: state info has been munged; not changed.\n" );
+ }
+ state = &new_state[ 1 ];
+ if( rand_type != TYPE_0 ) {
+ rptr = &state[ rear ];
+ fptr = &state[ (rear + rand_sep)%rand_deg ];
+ }
+ end_ptr = &state[ rand_deg ]; /* set end_ptr too */
+ return( ostate );
+}
+
+
+
+/*
+ * random:
+ * If we are using the trivial TYPE_0 R.N.G., just do the old linear
+ * congruential bit. Otherwise, we do our fancy trinomial stuff, which is the
+ * same in all ther other cases due to all the global variables that have been
+ * set up. The basic operation is to add the number at the rear pointer into
+ * the one at the front pointer. Then both pointers are advanced to the next
+ * location cyclically in the table. The value returned is the sum generated,
+ * reduced to 31 bits by throwing away the "least random" low bit.
+ * Note: the code takes advantage of the fact that both the front and
+ * rear pointers can't wrap on the same call by not testing the rear
+ * pointer if the front one has wrapped.
+ * Returns a 31-bit random number.
+ */
+
+long
+random()
+{
+ long i;
+
+ if( rand_type == TYPE_0 ) {
+ i = state[0] = ( state[0]*1103515245 + 12345 )&0x7fffffff;
+ }
+ else {
+ *fptr += *rptr;
+ i = (*fptr >> 1)&0x7fffffff; /* chucking least random bit */
+ if( ++fptr >= end_ptr ) {
+ fptr = state;
+ ++rptr;
+ }
+ else {
+ if( ++rptr >= end_ptr ) rptr = state;
+ }
+ }
+ return( i );
+}
diff --git a/contrib/awk/random.h b/contrib/awk/random.h
new file mode 100644
index 0000000..7fd0ff9
--- /dev/null
+++ b/contrib/awk/random.h
@@ -0,0 +1,29 @@
+/*
+ * random.h - redefine name of random lib routines to avoid conflicts
+ */
+
+/*
+ * Copyright (C) 1996 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#define initstate gawk_initstate
+#define setstate gawk_setstate
+#define random gawk_random
+#define srandom gawk_srandom
diff --git a/contrib/awk/re.c b/contrib/awk/re.c
new file mode 100644
index 0000000..995fbb9
--- /dev/null
+++ b/contrib/awk/re.c
@@ -0,0 +1,310 @@
+/*
+ * re.c - compile regular expressions.
+ */
+
+/*
+ * Copyright (C) 1991-1996 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+
+static reg_syntax_t syn;
+
+/* make_regexp --- generate compiled regular expressions */
+
+Regexp *
+make_regexp(s, len, ignorecase, dfa)
+char *s;
+size_t len;
+int ignorecase;
+int dfa;
+{
+ Regexp *rp;
+ const char *rerr;
+ char *src = s;
+ char *temp;
+ char *end = s + len;
+ register char *dest;
+ register int c, c2;
+
+ /* Handle escaped characters first. */
+
+ /*
+ * Build a copy of the string (in dest) with the
+ * escaped characters translated, and generate the regex
+ * from that.
+ */
+ emalloc(dest, char *, len + 2, "make_regexp");
+ temp = dest;
+
+ while (src < end) {
+ if (*src == '\\') {
+ c = *++src;
+ switch (c) {
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case 'x':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ c2 = parse_escape(&src);
+ if (c2 < 0)
+ cant_happen();
+ /*
+ * Unix awk treats octal (and hex?) chars
+ * literally in re's, so escape regexp
+ * metacharacters.
+ */
+ if (do_traditional && ! do_posix && (isdigit(c) || c == 'x')
+ && strchr("()|*+?.^$\\[]", c2) != NULL)
+ *dest++ = '\\';
+ *dest++ = (char) c2;
+ break;
+ case '8':
+ case '9': /* a\9b not valid */
+ *dest++ = c;
+ src++;
+ break;
+ case 'y': /* normally \b */
+ /* gnu regex op */
+ if (! do_traditional) {
+ *dest++ = '\\';
+ *dest++ = 'b';
+ src++;
+ break;
+ }
+ /* else, fall through */
+ default:
+ *dest++ = '\\';
+ *dest++ = (char) c;
+ src++;
+ break;
+ } /* switch */
+ } else
+ *dest++ = *src++; /* not '\\' */
+ } /* for */
+
+ *dest = '\0' ; /* Only necessary if we print dest ? */
+ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
+ memset((char *) rp, 0, sizeof(*rp));
+ rp->pat.allocated = 0; /* regex will allocate the buffer */
+ emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
+
+ if (ignorecase)
+ rp->pat.translate = casetable;
+ else
+ rp->pat.translate = NULL;
+ len = dest - temp;
+ if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
+ fatal("%s: /%s/", rerr, temp);
+
+ /* gack. this must be done *after* re_compile_pattern */
+ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
+ if (dfa && ! ignorecase) {
+ dfacomp(temp, len, &(rp->dfareg), TRUE);
+ rp->dfa = TRUE;
+ } else
+ rp->dfa = FALSE;
+
+ free(temp);
+ return rp;
+}
+
+/* research --- do a regexp search. use dfa if possible */
+
+int
+research(rp, str, start, len, need_start)
+Regexp *rp;
+register char *str;
+int start;
+register size_t len;
+int need_start;
+{
+ char *ret = str;
+ int try_backref;
+
+ /*
+ * Always do dfa search if can; if it fails, then even if
+ * need_start is true, we won't bother with the regex search.
+ */
+ if (rp->dfa) {
+ char save;
+ int count = 0;
+
+ /*
+ * dfa likes to stick a '\n' right after the matched
+ * text. So we just save and restore the character.
+ */
+ save = str[start+len];
+ ret = dfaexec(&(rp->dfareg), str+start, str+start+len, TRUE,
+ &count, &try_backref);
+ str[start+len] = save;
+ }
+ if (ret) {
+ if (need_start || rp->dfa == FALSE || try_backref) {
+ int result = re_search(&(rp->pat), str, start+len,
+ start, len, &(rp->regs));
+ /* recover any space from C based alloca */
+#ifdef C_ALLOCA
+ (void) alloca(0);
+#endif
+ return result;
+ } else
+ return 1;
+ } else
+ return -1;
+}
+
+/* refree --- free up the dynamic memory used by a compiled regexp */
+
+void
+refree(rp)
+Regexp *rp;
+{
+ free(rp->pat.buffer);
+ free(rp->pat.fastmap);
+ if (rp->regs.start)
+ free(rp->regs.start);
+ if (rp->regs.end)
+ free(rp->regs.end);
+ if (rp->dfa)
+ dfafree(&(rp->dfareg));
+ free(rp);
+}
+
+/* dfaerror --- print an error message for the dfa routines */
+
+void
+dfaerror(s)
+const char *s;
+{
+ fatal("%s", s);
+}
+
+/* re_update --- recompile a dynamic regexp */
+
+Regexp *
+re_update(t)
+NODE *t;
+{
+ NODE *t1;
+
+/* # define CASE 1 */
+ if ((t->re_flags & CASE) == IGNORECASE) {
+ if ((t->re_flags & CONST) != 0)
+ return t->re_reg;
+ t1 = force_string(tree_eval(t->re_exp));
+ if (t->re_text != NULL) {
+ if (cmp_nodes(t->re_text, t1) == 0) {
+ free_temp(t1);
+ return t->re_reg;
+ }
+ unref(t->re_text);
+ }
+ t->re_text = dupnode(t1);
+ free_temp(t1);
+ }
+ if (t->re_reg != NULL)
+ refree(t->re_reg);
+ if (t->re_cnt > 0)
+ t->re_cnt++;
+ if (t->re_cnt > 10)
+ t->re_cnt = 0;
+ if (t->re_text == NULL) {
+ t1 = force_string(tree_eval(t->re_exp));
+ t->re_text = dupnode(t1);
+ free_temp(t1);
+ }
+ t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
+ IGNORECASE, t->re_cnt);
+ t->re_flags &= ~CASE;
+ t->re_flags |= IGNORECASE;
+ return t->re_reg;
+}
+
+/* resetup --- choose what kind of regexps we match */
+
+void
+resetup()
+{
+ if (do_posix)
+ syn = RE_SYNTAX_POSIX_AWK; /* strict POSIX re's */
+ else if (do_traditional)
+ syn = RE_SYNTAX_AWK; /* traditional Unix awk re's */
+ else
+ syn = RE_SYNTAX_GNU_AWK; /* POSIX re's + GNU ops */
+
+ /*
+ * Interval expressions are off by default, since it's likely to
+ * break too many old programs to have them on.
+ */
+ if (do_intervals)
+ syn |= RE_INTERVALS;
+
+ (void) re_set_syntax(syn);
+ dfasyntax(syn, FALSE);
+}
+
+/* avoid_dfa --- FIXME: temporary kludge function until we have a new dfa.c */
+
+int
+avoid_dfa(re, str, len)
+NODE *re;
+char *str;
+size_t len;
+{
+ char *restr;
+ int relen;
+ int anchor, i;
+ char *end;
+
+ if ((re->re_flags & CONST) != 0) {
+ restr = re->re_exp->stptr;
+ relen = re->re_exp->stlen;
+ } else {
+ restr = re->re_text->stptr;
+ relen = re->re_text->stlen;
+ }
+
+ for (anchor = FALSE, i = 0; i < relen; i++) {
+ if (restr[i] == '^' || restr[i] == '$') {
+ anchor = TRUE;
+ break;
+ }
+ }
+ if (! anchor)
+ return FALSE;
+
+ for (end = str + len; str < end; str++)
+ if (*str == '\n')
+ return TRUE;
+
+ return FALSE;
+}
diff --git a/contrib/awk/regex.h b/contrib/awk/regex.h
new file mode 100644
index 0000000..5140052
--- /dev/null
+++ b/contrib/awk/regex.h
@@ -0,0 +1,531 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+ Copyright (C) 1985,89,90,91,92,93,95,96,97 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#ifdef _REGEX_RE_COMP
+#ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+#endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/contrib/awk/test/ChangeLog b/contrib/awk/test/ChangeLog
new file mode 100644
index 0000000..8efafaa
--- /dev/null
+++ b/contrib/awk/test/ChangeLog
@@ -0,0 +1,252 @@
+Thu May 15 12:49:08 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.3: Release tar file made.
+
+Tue May 13 12:53:46 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (messages): more testing for OK failure on Linux.
+
+Sun May 11 14:57:11 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (nondec): new test case.
+ * nondec.awk, nondec.ok: new files.
+
+Sun May 11 07:07:05 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (prdupval): new test case.
+ * prdupval.awk, prdupval.in, prdupval.ok: new files.
+
+Wed May 7 21:54:34 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (delarprm): new test case.
+ * delarprm.awk, delarprm.ok: new files.
+
+Wed May 7 17:54:00 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (pid): several fixes from ghazi@caip.rutgers.edu.
+
+Tue May 6 20:28:30 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (strftime): Use the right locale stuff.
+ (clobber): don't need an input file.
+
+Thu Apr 24 22:24:42 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (pid): new test case, from jco@convex.com.
+ (specfile): removed test case, pid does it better.
+ * pid.awk, pid.ok, pid.sh: new files.
+ * specfile.awk: removed.
+
+Wed Apr 23 23:37:10 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (pipeio2): new test case.
+ * pipeio2.awk, pipeio2.ok, pipeio2.in: new files.
+
+Sun Apr 20 12:22:52 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (clobber): new test case.
+ * clobber.awk, clobber.ok: new files.
+
+Fri Apr 18 07:55:47 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * BETA Release 3.0.34: Release tar file made.
+
+Tue Apr 15 05:57:29 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (strftlng): More wizardry for bizarre Unix systems.
+ (nlfldsep): use program and input file, not shell script
+ (basic, unix-tests, gawk.extensions): moved specfile, pipeio1
+ and strftlng into unix-tests per Pat Rankin.
+ * nlfldsep.awk, nlfldsep.in: new files.
+ * nlfldsep.sh: removed.
+
+Wed Apr 9 23:32:47 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (funstack): new test case.
+ * funstack.awk, funstack.in, funstack.ok: new files.
+ * substr.awk: added many more tests.
+ * substr.ok: updated
+
+Wed Mar 19 20:10:21 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (pipeio1): new test case.
+ * pipeio1.awk, pipeio1.ok: new files.
+
+Tue Mar 18 06:38:36 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (noparm): new test case.
+ * noparm.awk, noparm.ok: new files.
+
+Fri Feb 21 06:30:18 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (reint): new test case.
+ * reint.awk, reint.in, reint.ok: new files.
+
+Wed Feb 5 18:17:51 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (fnarydel): new test case.
+ * fnarydel.awk, fnarydel.ok: new files.
+
+Sun Jan 19 17:06:18 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (nors): new test case.
+ * nors.ok: new file.
+
+Sun Jan 19 17:06:18 1997 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (specfile, strftlng, nfldstr): new test cases.
+ * specfile.awk, strftlng.awk, strftlng.ok, nfldstr.ok: new files.
+
+Fri Dec 27 11:27:13 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (intest): new test case.
+ * intest.awk, intest.ok: new files.
+
+Wed Dec 25 11:25:22 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.2: Release tar file made.
+
+Tue Dec 10 23:09:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Release 3.0.1: Release tar file made.
+
+Thu Nov 7 09:12:20 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (splitvar): new test case.
+ * splitvar.awk, splitvar.in, splitvar.ok: new files.
+
+Sun Nov 3 10:55:50 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (nlfldsep): new test case.
+ * nlfldsep.sh, nlfldsep.ok: new files.
+
+Fri Oct 25 10:29:56 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * rand.awk: call srand with fixed seed.
+ * rand.ok: new file.
+ * Makefile.in (rand): changed to compare output with rand.ok.
+
+Sat Oct 19 21:52:04 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (tradanch): new test case.
+ * tradanch.awk, tradanch.in, tradanch.ok: new files.
+
+Thu Oct 17 21:22:05 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * tweakfld.awk: move `rm' out into Makefile.in.
+ * eofsplit.awk: fixed buggy code so won't loop forever.
+ * Makefile.in (all): add unix-tests.
+ (unix-tests): new target, has pound-bang, fflush, getlnhd.
+ (basic): removed fflush, getlnhd.
+ (tweakfld): added rm from tweakfld.awk.
+
+Sun Oct 6 22:00:35 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (back89): new test case.
+ * back89.in, back89.ok: new files.
+
+Sun Oct 6 20:45:54 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (splitwht): new test case.
+ * splitwht.awk, splitwht.ok: new files.
+
+Sun Sep 29 23:14:20 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (gsubtest): new test case.
+ * gsubtest.awk, gsubtest.ok: new files.
+
+Fri Sep 20 11:58:40 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (prtoeval): new test case.
+ * prtoeval.awk, prtoeval.ok: new files.
+
+Tue Sep 10 06:26:44 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (gsubasgn): new test case.
+ * gsubasgn.awk, gsubasgn.ok: new files.
+
+Wed Aug 28 22:06:33 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * badargs.ok: updated output corresponding to change made to
+ main.c (see main ChangeLog).
+
+Thu Aug 1 07:20:28 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (clean): remove out[123] files from `messages' test.
+ Thanks to Pat Rankin (rankin@eql.caltech.edu).
+
+Sat Jul 27 23:56:57 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (prt1eval): new test case.
+ * prt1eval.awk, prt1eval.ok: new files.
+
+Mon Jul 22 22:06:10 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (eofsplit): new test case.
+ * eofsplit.awk, eofsplit.ok: new files.
+
+Sun Jul 14 07:07:45 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (fldchgnf): new test case.
+ * fldchgnf.awk, fldchgnf.ok: new files.
+
+Tue May 21 23:23:22 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (substr): new test case.
+ * substr.awk, substr.ok: new files.
+
+Tue May 14 15:05:23 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (dynlj): new test case.
+ * dynlj.awk, dynlj.ok: new files.
+
+Sun May 12 20:45:34 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (fnarray): new test case.
+ * fnarray.awk, fnarray.ok: new files.
+
+Fri Mar 15 06:46:48 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (clean): added `*~' to list of files to be removed.
+ * tweakfld.awk (END): added to do clean up action.
+
+Thu Mar 14 16:41:32 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (mmap8k): new test case.
+ * mmap8k.in, mmap8k.ok: new files.
+
+Sun Mar 10 22:58:35 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (clsflnam): new test case.
+ * clsflnam.in, clsflnam.awk, clsflnam.ok: new files.
+ * tweakfld.awk: changed to have a fixed date.
+
+Thu Mar 7 09:56:09 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (tweakfld): new test case.
+ * tweakfld.in, tweakfld.awk, tweakfld.ok: new files.
+
+Sun Mar 3 06:51:26 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (getlnhd, backgsub) : new test cases.
+ * getlnhd.awk, getlnhd.ok: new files.
+ * backgsub.in, backgsub.awk, backgsub.ok: new files.
+
+Mon Feb 26 22:30:02 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (sprintfc): new test case.
+ * sprintfc.in, sprintfc.awk, sprintfc.ok: new files.
+ * gensub.awk: updated for case of no match of regex.
+
+Wed Jan 24 10:06:16 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * Makefile.in (distclean, maintainer-clean): new targets.
+ (reindops): added test from Rick Adams (rick@uunet.uu.net).
+ (arrayparm, paramdup, defref, strftime, prmarscl, sclforin,
+ sclifin): Fix from Larry Schwimmer (schwim@cyclone.stanford.edu)
+ so that tests that are supposed to fail use `... || exit 0' to
+ cause a clean `make clean'.
+
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/contrib/awk/test/Makefile b/contrib/awk/test/Makefile
new file mode 100644
index 0000000..1a9168e
--- /dev/null
+++ b/contrib/awk/test/Makefile
@@ -0,0 +1,451 @@
+# Generated automatically from Makefile.in by configure.
+# Makefile for GNU Awk test suite.
+#
+# Copyright (C) 1988-1997 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+SHELL = /bin/sh
+AWK = ../gawk
+CMP = cmp
+
+srcdir = .
+
+bigtest: basic unix-tests gawk.extensions
+
+basic: msg swaplns messages argarray longwrds \
+ getline fstabplus compare arrayref rs fsrs rand \
+ fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
+ convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+ rswhite prmarscl sclforin sclifin intprec childin noeffect \
+ numsubstr pcntplus prmreuse math fldchg fldchgnf reindops \
+ sprintfc backgsub tweakfld clsflnam mmap8k fnarray \
+ dynlj substr eofsplit prt1eval gsubasgn prtoeval gsubtest splitwht \
+ back89 tradanch nlfldsep splitvar intest nfldstr nors fnarydel \
+ noparms funstack clobber delarprm prdupval
+
+unix-tests: poundbang fflush getlnhd pipeio1 pipeio2 strftlng pid
+
+gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
+ badargs strftime gensub gnureops reint nondec
+
+extra: regtest inftest
+
+poundbang::
+ @cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
+ @rm -f /tmp/gawk
+ $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
+
+msg::
+ @echo 'Any output from "cmp" is bad news, although some differences'
+ @echo 'in floating point values are probably benign -- in particular,'
+ @echo 'some systems may omit a leading zero and the floating point'
+ @echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+ @$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+ $(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
+
+messages::
+ @$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+ { $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && \
+ $(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || \
+ { { test -d /dev/fd || test -d /proc/self/fd; } && \
+ echo IT IS OK THAT THIS TEST FAILED; }
+
+argarray::
+ @case $(srcdir) in \
+ .) : ;; \
+ *) cp $(srcdir)/argarray.in . ;; \
+ esac
+ @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+ $(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
+
+fstabplus::
+ @echo '1 2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+ $(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
+
+fsrs::
+ @$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+ $(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
+
+igncfs::
+ @$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+ $(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
+
+longwrds::
+ @$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+ $(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
+
+fieldwdth::
+ @echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+ $(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
+
+ignrcase::
+ @echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+ $(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
+
+regtest::
+ @echo 'Some of the output from regtest is very system specific, do not'
+ @echo 'be distressed if your output differs from that distributed.'
+ @echo 'Manual inspection is called for.'
+ AWK=`pwd`/$(AWK) $(srcdir)/regtest
+
+posix::
+ @echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+ $(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+
+manyfiles::
+ @rm -rf junk
+ @mkdir junk
+ @$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+ @$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
+ @echo "This number better be 1 ->" | tr -d '\012'
+ @wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+ @rm -rf junk _$@
+
+compare::
+ @$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+ $(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
+
+arrayref::
+ @$(AWK) -f $(srcdir)/arrayref.awk >_$@
+ $(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
+
+rs::
+ @$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+ $(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
+
+fsbs::
+ @$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+ $(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
+
+inftest::
+ @echo This test is very machine specific...
+ @$(AWK) -f $(srcdir)/inftest.awk >_$@
+ $(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+
+getline::
+ @$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+ $(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
+
+rand::
+ @$(AWK) -f $(srcdir)/rand.awk >_$@
+ $(CMP) $(srcdir)/rand.ok _$@ && rm -f _$@
+
+negexp::
+ @$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+ $(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
+
+asgext::
+ @$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+ $(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
+
+anchgsub::
+ @$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+ $(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
+
+splitargv::
+ @$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+ $(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
+
+awkpath::
+ @AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+ $(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
+
+nfset::
+ @$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+ $(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
+
+reparse::
+ @$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+ $(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
+
+argtest::
+ @$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+ $(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
+
+badargs::
+ @-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+ $(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
+
+convfmt::
+ @$(AWK) -f $(srcdir)/convfmt.awk >_$@
+ $(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
+
+arrayparm::
+ @-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
+
+paramdup::
+ @-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
+
+nonl::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+ $(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+ : this test could fail on slow machines or on a second boundary,
+ : so if it does, double check the actual results
+ @LC_ALL=C; export LC_ALL; LANC=C; export LANG; \
+ date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; \
+ print > "strftime.ok" ; \
+ print strftime() > "'_$@'" }'
+ $(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok || exit 0
+
+litoct::
+ @echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+ $(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+ @$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+ $(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+ @echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+ $(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+ @$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+ $(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+ @-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+ @-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+ @echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+ $(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+ @-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+ $(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+ @$(AWK) -f $(srcdir)/gnureops.awk >_$@
+ $(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+ @$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+ $(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+ @$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+ $(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+ @$(AWK) -f $(srcdir)/math.awk >_$@
+ $(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+ @$(srcdir)/fflush.sh >_$@
+ $(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+ @$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+ $(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
+
+fldchgnf::
+ @$(AWK) -f $(srcdir)/fldchgnf.awk $(srcdir)/fldchgnf.in >_$@
+ $(CMP) $(srcdir)/fldchgnf.ok _$@ && rm -f _$@
+
+reindops::
+ @$(AWK) -f $(srcdir)/reindops.awk $(srcdir)/reindops.in >_$@
+ $(CMP) $(srcdir)/reindops.ok _$@ && rm -f _$@
+
+sprintfc::
+ @$(AWK) -f $(srcdir)/sprintfc.awk $(srcdir)/sprintfc.in >_$@
+ $(CMP) $(srcdir)/sprintfc.ok _$@ && rm -f _$@
+
+getlnhd::
+ @$(AWK) -f $(srcdir)/getlnhd.awk >_$@
+ $(CMP) $(srcdir)/getlnhd.ok _$@ && rm -f _$@
+
+backgsub::
+ @$(AWK) -f $(srcdir)/backgsub.awk $(srcdir)/backgsub.in >_$@
+ $(CMP) $(srcdir)/backgsub.ok _$@ && rm -f _$@
+
+tweakfld::
+ @$(AWK) -f $(srcdir)/tweakfld.awk $(srcdir)/tweakfld.in >_$@
+ @rm -f errors.cleanup
+ $(CMP) $(srcdir)/tweakfld.ok _$@ && rm -f _$@
+
+clsflnam::
+ @$(AWK) -f $(srcdir)/clsflnam.awk $(srcdir)/clsflnam.in >_$@
+ $(CMP) $(srcdir)/clsflnam.ok _$@ && rm -f _$@
+
+mmap8k::
+ @$(AWK) '{ print }' $(srcdir)/mmap8k.in >_$@
+ $(CMP) $(srcdir)/mmap8k.in _$@ && rm -f _$@
+
+fnarray::
+ @-AWKPATH=$(srcdir) $(AWK) -f fnarray.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/fnarray.ok _$@ && rm -f _$@
+
+dynlj::
+ @$(AWK) -f $(srcdir)/dynlj.awk >_$@
+ $(CMP) $(srcdir)/dynlj.ok _$@ && rm -f _$@
+
+substr::
+ @$(AWK) -f $(srcdir)/substr.awk >_$@
+ $(CMP) $(srcdir)/substr.ok _$@ && rm -f _$@
+
+eofsplit::
+ @$(AWK) -f $(srcdir)/eofsplit.awk >_$@
+ $(CMP) $(srcdir)/eofsplit.ok _$@ && rm -f _$@
+
+prt1eval::
+ @$(AWK) -f $(srcdir)/prt1eval.awk >_$@
+ $(CMP) $(srcdir)/prt1eval.ok _$@ && rm -f _$@
+
+gsubasgn::
+ @-AWKPATH=$(srcdir) $(AWK) -f gsubasgn.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/gsubasgn.ok _$@ && rm -f _$@
+
+prtoeval::
+ @$(AWK) -f $(srcdir)/prtoeval.awk >_$@
+ $(CMP) $(srcdir)/prtoeval.ok _$@ && rm -f _$@
+
+gsubtest::
+ @$(AWK) -f $(srcdir)/gsubtest.awk >_$@
+ $(CMP) $(srcdir)/gsubtest.ok _$@ && rm -f _$@
+
+splitwht::
+ @$(AWK) -f $(srcdir)/splitwht.awk >_$@
+ $(CMP) $(srcdir)/splitwht.ok _$@ && rm -f _$@
+
+back89::
+ @$(AWK) '/a\8b/' $(srcdir)/back89.in >_$@
+ $(CMP) $(srcdir)/back89.ok _$@ && rm -f _$@
+
+tradanch::
+ @$(AWK) --traditional -f $(srcdir)/tradanch.awk $(srcdir)/tradanch.in >_$@
+ $(CMP) $(srcdir)/tradanch.ok _$@ && rm -f _$@
+
+nlfldsep::
+ @$(AWK) -f $(srcdir)/nlfldsep.awk $(srcdir)/nlfldsep.in > _$@
+ $(CMP) $(srcdir)/nlfldsep.ok _$@ && rm -f _$@
+
+splitvar::
+ @$(AWK) -f $(srcdir)/splitvar.awk $(srcdir)/splitvar.in >_$@
+ $(CMP) $(srcdir)/splitvar.ok _$@ && rm -f _$@
+
+intest::
+ @$(AWK) -f $(srcdir)/intest.awk >_$@
+ $(CMP) $(srcdir)/intest.ok _$@ && rm -f _$@
+
+# AIX /bin/sh exec's the last command in a list, therefore issue a ":"
+# command so that pid.sh is fork'ed as a child before being exec'ed.
+pid::
+ @AWKPATH=$(srcdir) AWK=$(AWK) $(SHELL) $(srcdir)/pid.sh $$$$ > _`basename $@` ; :
+ $(CMP) $(srcdir)/pid.ok _`basename $@` && rm -f _`basename $@` _`basename $@`.in
+
+strftlng::
+ @TZ=UTC; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@
+ @if $(CMP) -s $(srcdir)/strftlng.ok _$@ ; then : ; else \
+ TZ=UTC0; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \
+ fi
+ $(CMP) $(srcdir)/strftlng.ok _$@ && rm -f _$@
+
+nfldstr::
+ @echo | $(AWK) '$$1 == 0 { print "bug" }' > _$@
+ $(CMP) $(srcdir)/nfldstr.ok _$@ && rm -f _$@
+
+nors::
+ @echo A B C D E | tr -d '\12' | $(AWK) '{ print $$NF }' - $(srcdir)/nors.in > _$@
+ $(CMP) $(srcdir)/nors.ok _$@ && rm -f _$@
+
+fnarydel::
+ @$(AWK) -f $(srcdir)/fnarydel.awk >_$@
+ $(CMP) $(srcdir)/fnarydel.ok _$@ && rm -f _$@
+
+reint::
+ @$(AWK) --re-interval -f $(srcdir)/reint.awk $(srcdir)/reint.in >_$@
+ $(CMP) $(srcdir)/reint.ok _$@ && rm -f _$@
+
+noparms::
+ @-AWKPATH=$(srcdir) $(AWK) -f noparms.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/noparms.ok _$@ && rm -f _$@
+
+pipeio1::
+ @$(AWK) -f $(srcdir)/pipeio1.awk >_$@
+ @rm -f test1 test2
+ $(CMP) $(srcdir)/pipeio1.ok _$@ && rm -f _$@
+
+pipeio2::
+ @$(AWK) -v SRCDIR=$(srcdir) -f $(srcdir)/pipeio2.awk >_$@
+ $(CMP) $(srcdir)/pipeio2.ok _$@ && rm -f _$@
+
+funstack::
+ @$(AWK) -f $(srcdir)/funstack.awk $(srcdir)/funstack.in >_$@
+ $(CMP) $(srcdir)/funstack.ok _$@ && rm -f _$@
+
+clobber::
+ @$(AWK) -f $(srcdir)/clobber.awk >_$@
+ $(CMP) $(srcdir)/clobber.ok seq && $(CMP) $(srcdir)/clobber.ok _$@ && rm -f _$@
+ @rm -f seq
+
+delarprm::
+ @$(AWK) -f $(srcdir)/delarprm.awk >_$@
+ $(CMP) $(srcdir)/delarprm.ok _$@ && rm -f _$@
+
+prdupval::
+ @$(AWK) -f $(srcdir)/prdupval.awk $(srcdir)/prdupval.in >_$@
+ $(CMP) $(srcdir)/prdupval.ok _$@ && rm -f _$@
+
+nondec::
+ @if grep BITOP ../config.h | grep define > /dev/null; \
+ then \
+ $(AWK) -f $(srcdir)/nondec.awk >_$@; \
+ else \
+ cp $(srcdir)/nondec.ok _$@; \
+ fi
+ $(CMP) $(srcdir)/nondec.ok _$@ && rm -f _$@
+
+clean:
+ rm -fr _* core junk out1 out2 out3 strftime.ok test1 test2 seq *~
+
+distclean: clean
+ rm -f Makefile
+
+maintainer-clean: distclean
diff --git a/contrib/awk/test/Makefile.in b/contrib/awk/test/Makefile.in
new file mode 100644
index 0000000..07d0cbc
--- /dev/null
+++ b/contrib/awk/test/Makefile.in
@@ -0,0 +1,451 @@
+# Makefile for GNU Awk test suite.
+#
+# Copyright (C) 1988-1997 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+SHELL = /bin/sh
+AWK = ../gawk
+CMP = cmp
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+bigtest: basic unix-tests gawk.extensions
+
+basic: msg swaplns messages argarray longwrds \
+ getline fstabplus compare arrayref rs fsrs rand \
+ fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
+ convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+ rswhite prmarscl sclforin sclifin intprec childin noeffect \
+ numsubstr pcntplus prmreuse math fldchg fldchgnf reindops \
+ sprintfc backgsub tweakfld clsflnam mmap8k fnarray \
+ dynlj substr eofsplit prt1eval gsubasgn prtoeval gsubtest splitwht \
+ back89 tradanch nlfldsep splitvar intest nfldstr nors fnarydel \
+ noparms funstack clobber delarprm prdupval
+
+unix-tests: poundbang fflush getlnhd pipeio1 pipeio2 strftlng pid
+
+gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
+ badargs strftime gensub gnureops reint nondec
+
+extra: regtest inftest
+
+poundbang::
+ @cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
+ @rm -f /tmp/gawk
+ $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
+
+msg::
+ @echo 'Any output from "cmp" is bad news, although some differences'
+ @echo 'in floating point values are probably benign -- in particular,'
+ @echo 'some systems may omit a leading zero and the floating point'
+ @echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+ @$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+ $(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
+
+messages::
+ @$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+ { $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && \
+ $(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || \
+ { { test -d /dev/fd || test -d /proc/self/fd; } && \
+ echo IT IS OK THAT THIS TEST FAILED; }
+
+argarray::
+ @case $(srcdir) in \
+ .) : ;; \
+ *) cp $(srcdir)/argarray.in . ;; \
+ esac
+ @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+ $(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
+
+fstabplus::
+ @echo '1 2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+ $(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
+
+fsrs::
+ @$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+ $(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
+
+igncfs::
+ @$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+ $(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
+
+longwrds::
+ @$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+ $(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
+
+fieldwdth::
+ @echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+ $(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
+
+ignrcase::
+ @echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+ $(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
+
+regtest::
+ @echo 'Some of the output from regtest is very system specific, do not'
+ @echo 'be distressed if your output differs from that distributed.'
+ @echo 'Manual inspection is called for.'
+ AWK=`pwd`/$(AWK) $(srcdir)/regtest
+
+posix::
+ @echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+ $(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+
+manyfiles::
+ @rm -rf junk
+ @mkdir junk
+ @$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+ @$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
+ @echo "This number better be 1 ->" | tr -d '\012'
+ @wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+ @rm -rf junk _$@
+
+compare::
+ @$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+ $(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
+
+arrayref::
+ @$(AWK) -f $(srcdir)/arrayref.awk >_$@
+ $(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
+
+rs::
+ @$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+ $(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
+
+fsbs::
+ @$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+ $(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
+
+inftest::
+ @echo This test is very machine specific...
+ @$(AWK) -f $(srcdir)/inftest.awk >_$@
+ $(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+
+getline::
+ @$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+ $(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
+
+rand::
+ @$(AWK) -f $(srcdir)/rand.awk >_$@
+ $(CMP) $(srcdir)/rand.ok _$@ && rm -f _$@
+
+negexp::
+ @$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+ $(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
+
+asgext::
+ @$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+ $(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
+
+anchgsub::
+ @$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+ $(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
+
+splitargv::
+ @$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+ $(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
+
+awkpath::
+ @AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+ $(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
+
+nfset::
+ @$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+ $(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
+
+reparse::
+ @$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+ $(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
+
+argtest::
+ @$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+ $(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
+
+badargs::
+ @-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+ $(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
+
+convfmt::
+ @$(AWK) -f $(srcdir)/convfmt.awk >_$@
+ $(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
+
+arrayparm::
+ @-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
+
+paramdup::
+ @-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
+
+nonl::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+ $(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+ : this test could fail on slow machines or on a second boundary,
+ : so if it does, double check the actual results
+ @LC_ALL=C; export LC_ALL; LANC=C; export LANG; \
+ date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; \
+ print > "strftime.ok" ; \
+ print strftime() > "'_$@'" }'
+ $(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok || exit 0
+
+litoct::
+ @echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+ $(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+ @$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+ $(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+ @echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+ $(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+ @$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+ $(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+ @-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+ @-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+ @echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+ $(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+ @-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+ $(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+ @$(AWK) -f $(srcdir)/gnureops.awk >_$@
+ $(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+ @$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+ $(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+ @$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+ $(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+ @$(AWK) -f $(srcdir)/math.awk >_$@
+ $(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+ @$(srcdir)/fflush.sh >_$@
+ $(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+ @$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+ $(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
+
+fldchgnf::
+ @$(AWK) -f $(srcdir)/fldchgnf.awk $(srcdir)/fldchgnf.in >_$@
+ $(CMP) $(srcdir)/fldchgnf.ok _$@ && rm -f _$@
+
+reindops::
+ @$(AWK) -f $(srcdir)/reindops.awk $(srcdir)/reindops.in >_$@
+ $(CMP) $(srcdir)/reindops.ok _$@ && rm -f _$@
+
+sprintfc::
+ @$(AWK) -f $(srcdir)/sprintfc.awk $(srcdir)/sprintfc.in >_$@
+ $(CMP) $(srcdir)/sprintfc.ok _$@ && rm -f _$@
+
+getlnhd::
+ @$(AWK) -f $(srcdir)/getlnhd.awk >_$@
+ $(CMP) $(srcdir)/getlnhd.ok _$@ && rm -f _$@
+
+backgsub::
+ @$(AWK) -f $(srcdir)/backgsub.awk $(srcdir)/backgsub.in >_$@
+ $(CMP) $(srcdir)/backgsub.ok _$@ && rm -f _$@
+
+tweakfld::
+ @$(AWK) -f $(srcdir)/tweakfld.awk $(srcdir)/tweakfld.in >_$@
+ @rm -f errors.cleanup
+ $(CMP) $(srcdir)/tweakfld.ok _$@ && rm -f _$@
+
+clsflnam::
+ @$(AWK) -f $(srcdir)/clsflnam.awk $(srcdir)/clsflnam.in >_$@
+ $(CMP) $(srcdir)/clsflnam.ok _$@ && rm -f _$@
+
+mmap8k::
+ @$(AWK) '{ print }' $(srcdir)/mmap8k.in >_$@
+ $(CMP) $(srcdir)/mmap8k.in _$@ && rm -f _$@
+
+fnarray::
+ @-AWKPATH=$(srcdir) $(AWK) -f fnarray.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/fnarray.ok _$@ && rm -f _$@
+
+dynlj::
+ @$(AWK) -f $(srcdir)/dynlj.awk >_$@
+ $(CMP) $(srcdir)/dynlj.ok _$@ && rm -f _$@
+
+substr::
+ @$(AWK) -f $(srcdir)/substr.awk >_$@
+ $(CMP) $(srcdir)/substr.ok _$@ && rm -f _$@
+
+eofsplit::
+ @$(AWK) -f $(srcdir)/eofsplit.awk >_$@
+ $(CMP) $(srcdir)/eofsplit.ok _$@ && rm -f _$@
+
+prt1eval::
+ @$(AWK) -f $(srcdir)/prt1eval.awk >_$@
+ $(CMP) $(srcdir)/prt1eval.ok _$@ && rm -f _$@
+
+gsubasgn::
+ @-AWKPATH=$(srcdir) $(AWK) -f gsubasgn.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/gsubasgn.ok _$@ && rm -f _$@
+
+prtoeval::
+ @$(AWK) -f $(srcdir)/prtoeval.awk >_$@
+ $(CMP) $(srcdir)/prtoeval.ok _$@ && rm -f _$@
+
+gsubtest::
+ @$(AWK) -f $(srcdir)/gsubtest.awk >_$@
+ $(CMP) $(srcdir)/gsubtest.ok _$@ && rm -f _$@
+
+splitwht::
+ @$(AWK) -f $(srcdir)/splitwht.awk >_$@
+ $(CMP) $(srcdir)/splitwht.ok _$@ && rm -f _$@
+
+back89::
+ @$(AWK) '/a\8b/' $(srcdir)/back89.in >_$@
+ $(CMP) $(srcdir)/back89.ok _$@ && rm -f _$@
+
+tradanch::
+ @$(AWK) --traditional -f $(srcdir)/tradanch.awk $(srcdir)/tradanch.in >_$@
+ $(CMP) $(srcdir)/tradanch.ok _$@ && rm -f _$@
+
+nlfldsep::
+ @$(AWK) -f $(srcdir)/nlfldsep.awk $(srcdir)/nlfldsep.in > _$@
+ $(CMP) $(srcdir)/nlfldsep.ok _$@ && rm -f _$@
+
+splitvar::
+ @$(AWK) -f $(srcdir)/splitvar.awk $(srcdir)/splitvar.in >_$@
+ $(CMP) $(srcdir)/splitvar.ok _$@ && rm -f _$@
+
+intest::
+ @$(AWK) -f $(srcdir)/intest.awk >_$@
+ $(CMP) $(srcdir)/intest.ok _$@ && rm -f _$@
+
+# AIX /bin/sh exec's the last command in a list, therefore issue a ":"
+# command so that pid.sh is fork'ed as a child before being exec'ed.
+pid::
+ @AWKPATH=$(srcdir) AWK=$(AWK) $(SHELL) $(srcdir)/pid.sh $$$$ > _`basename $@` ; :
+ $(CMP) $(srcdir)/pid.ok _`basename $@` && rm -f _`basename $@` _`basename $@`.in
+
+strftlng::
+ @TZ=UTC; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@
+ @if $(CMP) -s $(srcdir)/strftlng.ok _$@ ; then : ; else \
+ TZ=UTC0; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \
+ fi
+ $(CMP) $(srcdir)/strftlng.ok _$@ && rm -f _$@
+
+nfldstr::
+ @echo | $(AWK) '$$1 == 0 { print "bug" }' > _$@
+ $(CMP) $(srcdir)/nfldstr.ok _$@ && rm -f _$@
+
+nors::
+ @echo A B C D E | tr -d '\12' | $(AWK) '{ print $$NF }' - $(srcdir)/nors.in > _$@
+ $(CMP) $(srcdir)/nors.ok _$@ && rm -f _$@
+
+fnarydel::
+ @$(AWK) -f $(srcdir)/fnarydel.awk >_$@
+ $(CMP) $(srcdir)/fnarydel.ok _$@ && rm -f _$@
+
+reint::
+ @$(AWK) --re-interval -f $(srcdir)/reint.awk $(srcdir)/reint.in >_$@
+ $(CMP) $(srcdir)/reint.ok _$@ && rm -f _$@
+
+noparms::
+ @-AWKPATH=$(srcdir) $(AWK) -f noparms.awk >_$@ 2>&1 || exit 0
+ $(CMP) $(srcdir)/noparms.ok _$@ && rm -f _$@
+
+pipeio1::
+ @$(AWK) -f $(srcdir)/pipeio1.awk >_$@
+ @rm -f test1 test2
+ $(CMP) $(srcdir)/pipeio1.ok _$@ && rm -f _$@
+
+pipeio2::
+ @$(AWK) -v SRCDIR=$(srcdir) -f $(srcdir)/pipeio2.awk >_$@
+ $(CMP) $(srcdir)/pipeio2.ok _$@ && rm -f _$@
+
+funstack::
+ @$(AWK) -f $(srcdir)/funstack.awk $(srcdir)/funstack.in >_$@
+ $(CMP) $(srcdir)/funstack.ok _$@ && rm -f _$@
+
+clobber::
+ @$(AWK) -f $(srcdir)/clobber.awk >_$@
+ $(CMP) $(srcdir)/clobber.ok seq && $(CMP) $(srcdir)/clobber.ok _$@ && rm -f _$@
+ @rm -f seq
+
+delarprm::
+ @$(AWK) -f $(srcdir)/delarprm.awk >_$@
+ $(CMP) $(srcdir)/delarprm.ok _$@ && rm -f _$@
+
+prdupval::
+ @$(AWK) -f $(srcdir)/prdupval.awk $(srcdir)/prdupval.in >_$@
+ $(CMP) $(srcdir)/prdupval.ok _$@ && rm -f _$@
+
+nondec::
+ @if grep BITOP ../config.h | grep define > /dev/null; \
+ then \
+ $(AWK) -f $(srcdir)/nondec.awk >_$@; \
+ else \
+ cp $(srcdir)/nondec.ok _$@; \
+ fi
+ $(CMP) $(srcdir)/nondec.ok _$@ && rm -f _$@
+
+clean:
+ rm -fr _* core junk out1 out2 out3 strftime.ok test1 test2 seq *~
+
+distclean: clean
+ rm -f Makefile
+
+maintainer-clean: distclean
diff --git a/contrib/awk/test/README b/contrib/awk/test/README
new file mode 100644
index 0000000..2343be2
--- /dev/null
+++ b/contrib/awk/test/README
@@ -0,0 +1,18 @@
+Mon Jan 22 13:08:58 EST 1996
+
+This directory contains the tests for gawk. The tests use the
+following conventions.
+
+Given some aspect of gawk named `foo', there will be one or more
+of the following files:
+
+foo.awk --- actual code for the test if not inline in the Makefile
+foo.in --- the data for the test, if it needs data
+foo.ok --- the expected results
+_foo --- the actual results; generated at run time
+
+The _foo file will be left around if a test fails, allowing you to
+compare actual and expected results, in case they differ.
+
+If they do differ (other than strftime.ok and _strftime!), send in a
+bug report. See the manual for the bug report procedure.
diff --git a/contrib/awk/test/anchgsub.awk b/contrib/awk/test/anchgsub.awk
new file mode 100644
index 0000000..52e8aa4
--- /dev/null
+++ b/contrib/awk/test/anchgsub.awk
@@ -0,0 +1 @@
+{ gsub(/^[ ]*/, "", $0) ; print }
diff --git a/contrib/awk/test/anchgsub.in b/contrib/awk/test/anchgsub.in
new file mode 100644
index 0000000..b829d84
--- /dev/null
+++ b/contrib/awk/test/anchgsub.in
@@ -0,0 +1 @@
+ This is a test, this is only a test.
diff --git a/contrib/awk/test/anchgsub.ok b/contrib/awk/test/anchgsub.ok
new file mode 100644
index 0000000..c33dfb9
--- /dev/null
+++ b/contrib/awk/test/anchgsub.ok
@@ -0,0 +1 @@
+This is a test, this is only a test.
diff --git a/contrib/awk/test/argarray.awk b/contrib/awk/test/argarray.awk
new file mode 100644
index 0000000..1960f9bd
--- /dev/null
+++ b/contrib/awk/test/argarray.awk
@@ -0,0 +1,14 @@
+BEGIN {
+ argn = " argument" (ARGC > 1 ? "s" : "")
+ are = ARGC > 1 ? "are" : "is"
+ print "here we have " ARGC argn
+ print "which " are
+ for (x = 0; x < ARGC; x++)
+ print "\t", ARGV[x]
+ print "Environment variable TEST=" ENVIRON["TEST"]
+ print "and the current input file is called \"" FILENAME "\""
+}
+
+FNR == 1 {
+ print "in main loop, this input file is known as \"" FILENAME "\""
+}
diff --git a/contrib/awk/test/argarray.in b/contrib/awk/test/argarray.in
new file mode 100644
index 0000000..bc93338
--- /dev/null
+++ b/contrib/awk/test/argarray.in
@@ -0,0 +1 @@
+this is a simple test file
diff --git a/contrib/awk/test/argarray.ok b/contrib/awk/test/argarray.ok
new file mode 100644
index 0000000..18eb841
--- /dev/null
+++ b/contrib/awk/test/argarray.ok
@@ -0,0 +1,9 @@
+here we have 3 arguments
+which are
+ gawk
+ ./argarray.in
+ -
+Environment variable TEST=
+and the current input file is called ""
+in main loop, this input file is known as "./argarray.in"
+in main loop, this input file is known as "-"
diff --git a/contrib/awk/test/argtest.awk b/contrib/awk/test/argtest.awk
new file mode 100644
index 0000000..e7a1145
--- /dev/null
+++ b/contrib/awk/test/argtest.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ for (i = 0; i < ARGC; i++)
+ printf("ARGV[%d] = %s\n", i, ARGV[i])
+}
diff --git a/contrib/awk/test/argtest.ok b/contrib/awk/test/argtest.ok
new file mode 100644
index 0000000..591bc64
--- /dev/null
+++ b/contrib/awk/test/argtest.ok
@@ -0,0 +1,4 @@
+ARGV[0] = gawk
+ARGV[1] = -x
+ARGV[2] = -y
+ARGV[3] = abc
diff --git a/contrib/awk/test/arrayparm.awk b/contrib/awk/test/arrayparm.awk
new file mode 100644
index 0000000..d6f34d9
--- /dev/null
+++ b/contrib/awk/test/arrayparm.awk
@@ -0,0 +1,21 @@
+#
+# Test program from:
+#
+# Date: Tue, 21 Feb 95 16:09:29 EST
+# From: emory!blackhawk.com!aaron (Aaron Sosnick)
+#
+BEGIN {
+ foo[1]=1;
+ foo[2]=2;
+ bug1(foo);
+}
+function bug1(i) {
+ for (i in foo) {
+ bug2(i);
+ delete foo[i];
+ print i,1,bot[1];
+ }
+}
+function bug2(arg) {
+ bot[arg]=arg;
+}
diff --git a/contrib/awk/test/arrayparm.ok b/contrib/awk/test/arrayparm.ok
new file mode 100644
index 0000000..b315f7c
--- /dev/null
+++ b/contrib/awk/test/arrayparm.ok
@@ -0,0 +1 @@
+gawk: arrayparm.awk:18: fatal: attempt to use array `foo' in a scalar context
diff --git a/contrib/awk/test/arrayref.awk b/contrib/awk/test/arrayref.awk
new file mode 100644
index 0000000..144d41a
--- /dev/null
+++ b/contrib/awk/test/arrayref.awk
@@ -0,0 +1,13 @@
+ BEGIN { # foo[10] = 0 # put this line in and it will work
+ test(foo); print foo[1]
+ test2(foo2); print foo2[1]
+ }
+
+ function test(foo)
+ {
+ test2(foo)
+ }
+ function test2(bar)
+ {
+ bar[1] = 1
+ }
diff --git a/contrib/awk/test/arrayref.ok b/contrib/awk/test/arrayref.ok
new file mode 100644
index 0000000..6ed281c
--- /dev/null
+++ b/contrib/awk/test/arrayref.ok
@@ -0,0 +1,2 @@
+1
+1
diff --git a/contrib/awk/test/asgext.awk b/contrib/awk/test/asgext.awk
new file mode 100644
index 0000000..c7f1775
--- /dev/null
+++ b/contrib/awk/test/asgext.awk
@@ -0,0 +1 @@
+{ print $3; $4 = "a"; print }
diff --git a/contrib/awk/test/asgext.in b/contrib/awk/test/asgext.in
new file mode 100644
index 0000000..3743b5b
--- /dev/null
+++ b/contrib/awk/test/asgext.in
@@ -0,0 +1,3 @@
+1 2 3
+1
+1 2 3 4
diff --git a/contrib/awk/test/asgext.ok b/contrib/awk/test/asgext.ok
new file mode 100644
index 0000000..2c0df70
--- /dev/null
+++ b/contrib/awk/test/asgext.ok
@@ -0,0 +1,6 @@
+3
+1 2 3 a
+
+1 a
+3
+1 2 3 a
diff --git a/contrib/awk/test/awkpath.ok b/contrib/awk/test/awkpath.ok
new file mode 100644
index 0000000..6cffe1b
--- /dev/null
+++ b/contrib/awk/test/awkpath.ok
@@ -0,0 +1 @@
+Found it.
diff --git a/contrib/awk/test/back89.in b/contrib/awk/test/back89.in
new file mode 100644
index 0000000..b0a88f2
--- /dev/null
+++ b/contrib/awk/test/back89.in
@@ -0,0 +1,2 @@
+a8b
+a\8b
diff --git a/contrib/awk/test/back89.ok b/contrib/awk/test/back89.ok
new file mode 100644
index 0000000..e9ea4d5
--- /dev/null
+++ b/contrib/awk/test/back89.ok
@@ -0,0 +1 @@
+a8b
diff --git a/contrib/awk/test/backgsub.awk b/contrib/awk/test/backgsub.awk
new file mode 100644
index 0000000..bec7354
--- /dev/null
+++ b/contrib/awk/test/backgsub.awk
@@ -0,0 +1,4 @@
+{
+ gsub( "\\\\", "\\\\")
+ print
+}
diff --git a/contrib/awk/test/backgsub.in b/contrib/awk/test/backgsub.in
new file mode 100644
index 0000000..2d3f17f
--- /dev/null
+++ b/contrib/awk/test/backgsub.in
@@ -0,0 +1 @@
+\x\y\z
diff --git a/contrib/awk/test/backgsub.ok b/contrib/awk/test/backgsub.ok
new file mode 100644
index 0000000..e2e265f
--- /dev/null
+++ b/contrib/awk/test/backgsub.ok
@@ -0,0 +1 @@
+\\x\\y\\z
diff --git a/contrib/awk/test/badargs.ok b/contrib/awk/test/badargs.ok
new file mode 100644
index 0000000..c89e520
--- /dev/null
+++ b/contrib/awk/test/badargs.ok
@@ -0,0 +1,23 @@
+gawk: option requires an argument -- f
+Usage: gawk [POSIX or GNU style options] -f progfile [--] file ...
+ gawk [POSIX or GNU style options] [--] 'program' file ...
+POSIX options: GNU long options:
+ -f progfile --file=progfile
+ -F fs --field-separator=fs
+ -v var=val --assign=var=val
+ -m[fr] val
+ -W compat --compat
+ -W copyleft --copyleft
+ -W copyright --copyright
+ -W help --help
+ -W lint --lint
+ -W lint-old --lint-old
+ -W posix --posix
+ -W re-interval --re-interval
+ -W source=program-text --source=program-text
+ -W traditional --traditional
+ -W usage --usage
+ -W version --version
+
+Report bugs to bug-gnu-utils@prep.ai.mit.edu,
+with a Cc: to arnold@gnu.ai.mit.edu
diff --git a/contrib/awk/test/childin.ok b/contrib/awk/test/childin.ok
new file mode 100644
index 0000000..45b983b
--- /dev/null
+++ b/contrib/awk/test/childin.ok
@@ -0,0 +1 @@
+hi
diff --git a/contrib/awk/test/clobber.awk b/contrib/awk/test/clobber.awk
new file mode 100644
index 0000000..d6635f2
--- /dev/null
+++ b/contrib/awk/test/clobber.awk
@@ -0,0 +1,98 @@
+BEGIN {
+ print "000800" > "seq"
+ close("seq")
+ ARGV[1] = "seq"
+ ARGC = 2
+}
+
+{ printf "%06d", $1 + 1 >"seq";
+ printf "%06d", $1 + 1 }
+# Date: Mon, 20 Jan 1997 15:14:06 -0600 (CST)
+# From: Dave Bodenstab <emory!synet.net!imdave>
+# To: bug-gnu-utils@prep.ai.mit.edu
+# Subject: GNU awk 3.0.2 core dump
+# Cc: arnold@gnu.ai.mit.edu
+#
+# The following program produces a core file on my FreeBSD system:
+#
+# bash$ echo 000800 >/tmp/seq
+# bash$ gawk '{ printf "%06d", $1 + 1 >"/tmp/seq";
+# printf "%06d", $1 + 1 }' /tmp/seq
+#
+# This fragment comes from mgetty+sendfax.
+#
+# Here is the trace:
+#
+# Script started on Mon Jan 20 15:09:04 1997
+# bash$ gawk --version
+# GNU Awk 3.0.2
+# Copyright (C) 1989, 1991-1996 Free Software Foundation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# bash$ gdb gawk
+# GDB is free software and you are welcome to distribute copies of it
+# under certain conditions; type "show copying" to see the conditions.
+# There is absolutely no warranty for GDB; type "show warranty" for details.
+# GDB 4.13 (i386-unknown-freebsd),
+# Copyright 1994 Free Software Foundation, Inc...
+# (gdb) shell echo 000800 >/tmp/seq
+# (gdb) r '{ printf "%06d", $1 + 1 >"/tmp/seq"; printf "%06d", $1 + 1 }(gdb) r '{ printf "%06d", $1 + 1 >"/tmp/seq"; printf "%06d", $1 + 1 }' /tmp/seq
+# Starting program: /scratch/archive/src/cmd/gnuawk-3.0.2/gawk '{ printf "%06d", $1 + 1 >"/tmp/seq"; printf "%06d", $1 + 1 }' /tmp/seq
+#
+# Program received signal SIGBUS, Bus error.
+# 0xd86f in def_parse_field (up_to=1, buf=0x37704, len=6, fs=0x3b240, rp=0x0,
+# set=0xce6c <set_field>, n=0x0) at field.c:391
+# 391 sav = *end;
+# (gdb) bt
+# #0 0xd86f in def_parse_field (up_to=1, buf=0x37704, len=6, fs=0x3b240,
+# rp=0x0, set=0xce6c <set_field>, n=0x0) at field.c:391
+# #1 0xddb1 in get_field (requested=1, assign=0x0) at field.c:669
+# #2 0xc25d in r_get_lhs (ptr=0x3b9b4, assign=0x0) at eval.c:1339
+# #3 0x9ab0 in r_tree_eval (tree=0x3b9b4, iscond=0) at eval.c:604
+# #4 0xa5f1 in r_tree_eval (tree=0x3b9fc, iscond=0) at eval.c:745
+# #5 0x4661 in format_tree (fmt_string=0x3e040 "%06d", n0=0, carg=0x3ba20)
+# at builtin.c:620
+# #6 0x5beb in do_sprintf (tree=0x3b96c) at builtin.c:809
+# #7 0x5cd5 in do_printf (tree=0x3ba8c) at builtin.c:844
+# #8 0x9271 in interpret (tree=0x3ba8c) at eval.c:465
+# #9 0x8ca3 in interpret (tree=0x3bbd0) at eval.c:308
+# #10 0x8c34 in interpret (tree=0x3bc18) at eval.c:292
+# #11 0xf069 in do_input () at io.c:312
+# #12 0x12ba9 in main (argc=3, argv=0xefbfd538) at main.c:393
+# (gdb) l
+# 386 *buf += len;
+# 387 return nf;
+# 388 }
+# 389
+# 390 /* before doing anything save the char at *end */
+# 391 sav = *end;
+# 392 /* because it will be destroyed now: */
+# 393
+# 394 *end = ' '; /* sentinel character */
+# 395 for (; nf < up_to; scan++) {
+# (gdb) print end
+# $1 = 0x804d006 <Error reading address 0x804d006: No such file or directory>
+# (gdb) print buf
+# $2 = (char **) 0x37704
+# (gdb) print *buf
+# $3 = 0x804d000 <Error reading address 0x804d000: No such file or directory>
+# (gdb) q
+# The program is running. Quit anyway (and kill it)? (y or n) y
+# bash$ exit
+#
+# Script done on Mon Jan 20 15:11:07 1997
+#
+# Dave Bodenstab
+# imdave@synet.net
diff --git a/contrib/awk/test/clobber.ok b/contrib/awk/test/clobber.ok
new file mode 100644
index 0000000..7105708
--- /dev/null
+++ b/contrib/awk/test/clobber.ok
@@ -0,0 +1 @@
+000801 \ No newline at end of file
diff --git a/contrib/awk/test/clsflnam.awk b/contrib/awk/test/clsflnam.awk
new file mode 100644
index 0000000..5392891
--- /dev/null
+++ b/contrib/awk/test/clsflnam.awk
@@ -0,0 +1,12 @@
+#! /usr/bin/awk -f
+BEGIN {
+ getline
+# print ("FILENAME =", FILENAME) > "/dev/stderr"
+ #Rewind the file
+ if (close(FILENAME)) {
+ print "Error " ERRNO " closing input file" > "/dev/stderr";
+ exit;
+ }
+}
+{ print "Analysing ", $0 }
+
diff --git a/contrib/awk/test/clsflnam.in b/contrib/awk/test/clsflnam.in
new file mode 100644
index 0000000..a92d664
--- /dev/null
+++ b/contrib/awk/test/clsflnam.in
@@ -0,0 +1,3 @@
+line 1
+line 2
+line 3
diff --git a/contrib/awk/test/clsflnam.ok b/contrib/awk/test/clsflnam.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/clsflnam.ok
diff --git a/contrib/awk/test/compare.awk b/contrib/awk/test/compare.awk
new file mode 100644
index 0000000..39a88f3
--- /dev/null
+++ b/contrib/awk/test/compare.awk
@@ -0,0 +1,13 @@
+BEGIN {
+ if (ARGV[1]) print 1
+ ARGV[1] = ""
+ if (ARGV[2]) print 2
+ ARGV[2] = ""
+ if ("0") print "zero"
+ if ("") print "null"
+ if (0) print 0
+}
+{
+ if ($0) print $0
+ if ($1) print $1
+}
diff --git a/contrib/awk/test/compare.in b/contrib/awk/test/compare.in
new file mode 100644
index 0000000..1ab098b
--- /dev/null
+++ b/contrib/awk/test/compare.in
@@ -0,0 +1,4 @@
+0
+1
+0 1
+
diff --git a/contrib/awk/test/compare.ok b/contrib/awk/test/compare.ok
new file mode 100644
index 0000000..8241359
--- /dev/null
+++ b/contrib/awk/test/compare.ok
@@ -0,0 +1,5 @@
+2
+zero
+1
+1
+0 1
diff --git a/contrib/awk/test/convfmt.awk b/contrib/awk/test/convfmt.awk
new file mode 100644
index 0000000..90fd204
--- /dev/null
+++ b/contrib/awk/test/convfmt.awk
@@ -0,0 +1,10 @@
+BEGIN {
+ CONVFMT = "%2.2f"
+ a = 123.456
+ b = a "" # give `a' string value also
+ printf "a = %s\n", a
+ CONVFMT = "%.6g"
+ printf "a = %s\n", a
+ a += 0 # make `a' numeric only again
+ printf "a = %s\n", a # use `a' as string
+}
diff --git a/contrib/awk/test/convfmt.ok b/contrib/awk/test/convfmt.ok
new file mode 100644
index 0000000..a7b66f7
--- /dev/null
+++ b/contrib/awk/test/convfmt.ok
@@ -0,0 +1,3 @@
+a = 123.46
+a = 123.456
+a = 123.456
diff --git a/contrib/awk/test/defref.awk b/contrib/awk/test/defref.awk
new file mode 100644
index 0000000..b4e8f10
--- /dev/null
+++ b/contrib/awk/test/defref.awk
@@ -0,0 +1 @@
+BEGIN { foo() }
diff --git a/contrib/awk/test/defref.ok b/contrib/awk/test/defref.ok
new file mode 100644
index 0000000..f833c96
--- /dev/null
+++ b/contrib/awk/test/defref.ok
@@ -0,0 +1,2 @@
+gawk: defref.awk:2: warning: function `foo' called but never defined
+gawk: defref.awk:1: fatal: function `foo' not defined
diff --git a/contrib/awk/test/delarprm.awk b/contrib/awk/test/delarprm.awk
new file mode 100644
index 0000000..d59de31
--- /dev/null
+++ b/contrib/awk/test/delarprm.awk
@@ -0,0 +1,50 @@
+# From dragon!unagi.cis.upenn.edu!sjanet Tue Mar 25 17:12:20 1997
+# Return-Path: <dragon!unagi.cis.upenn.edu!sjanet>
+# Received: by skeeve.atl.ga.us (/\==/\ Smail3.1.22.1 #22.1)
+# id <m0w9eS4-000GWyC@skeeve.atl.ga.us>; Tue, 25 Mar 97 17:12 EST
+# Received: by vecnet.com (DECUS UUCP /2.0/2.0/2.0/);
+# Tue, 25 Mar 97 16:58:36 EDT
+# Received: from gnu-life.ai.mit.edu by antaries.vec.net (MX V4.2 VAX) with SMTP;
+# Tue, 25 Mar 1997 16:58:26 EST
+# Received: from linc.cis.upenn.edu by gnu-life.ai.mit.edu (8.8.5/8.6.12GNU) with
+# ESMTP id QAA24350 for <bug-gnu-utils@prep.ai.mit.edu>; Tue, 25 Mar
+# 1997 16:56:59 -0500 (EST)
+# Received: from unagi.cis.upenn.edu (UNAGI.CIS.UPENN.EDU [158.130.8.153]) by
+# linc.cis.upenn.edu (8.8.5/8.8.5) with ESMTP id QAA09424; Tue, 25 Mar
+# 1997 16:56:54 -0500 (EST)
+# Received: (from sjanet@localhost) by unagi.cis.upenn.edu (8.8.5/8.8.5) id
+# QAA03969; Tue, 25 Mar 1997 16:56:50 -0500 (EST)
+# Date: Tue, 25 Mar 1997 16:56:50 -0500 (EST)
+# From: Stan Janet <sjanet@unagi.cis.upenn.edu>
+# Message-ID: <199703252156.QAA03969@unagi.cis.upenn.edu>
+# To: bug-gnu-utils@prep.ai.mit.edu
+# CC: arnold@gnu.ai.mit.edu
+# Subject: GNU awk 3.0.2 bug: fatal error deleting local array inside function
+# Status: ORf
+#
+# Version: GNU Awk 3.0.2
+# Platforms: SunOS 4.1.1 (compiled with Sun cc)
+# IRIX 5.3 (compiled with SGI cc)
+# Problem: Deleting local array inside function causes fatal internal error (and
+# core dump. The error does not occur when the variable "x", unused in
+# the example, is removed or when the function is declared foo(x,p).
+# When the function is declared foo(p,x), adding a dummy line that uses
+# "x", e.g. "x=1" does not prevent the error. If "p" is not deleted,
+# there is no error. If "p[1]" is used to delete the lone element, there
+# is no error.
+#
+# ==== The program x.gawk ====
+
+function foo(p,x) {
+ p[1]="bar"
+ delete p
+ return 0
+}
+
+BEGIN {
+ foo()
+}
+
+# ==== The output for "gawk -f x.gawk" (SunOS) ====
+#
+# gawk: x.gawk:4: fatal error: internal error
diff --git a/contrib/awk/test/delarprm.ok b/contrib/awk/test/delarprm.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/delarprm.ok
diff --git a/contrib/awk/test/dynlj.awk b/contrib/awk/test/dynlj.awk
new file mode 100644
index 0000000..ec6851b
--- /dev/null
+++ b/contrib/awk/test/dynlj.awk
@@ -0,0 +1 @@
+BEGIN { printf "%*sworld\n", -20, "hello" }
diff --git a/contrib/awk/test/dynlj.ok b/contrib/awk/test/dynlj.ok
new file mode 100644
index 0000000..c8f3fe9
--- /dev/null
+++ b/contrib/awk/test/dynlj.ok
@@ -0,0 +1 @@
+hello world
diff --git a/contrib/awk/test/eofsplit.awk b/contrib/awk/test/eofsplit.awk
new file mode 100644
index 0000000..22042b4
--- /dev/null
+++ b/contrib/awk/test/eofsplit.awk
@@ -0,0 +1,68 @@
+# Date: Sat, 30 Mar 1996 12:47:17 -0800 (PST)
+# From: Charles Howes <chowes@grid.direct.ca>
+# To: bug-gnu-utils@prep.ai.mit.edu, arnold@gnu.ai.mit.edu
+# Subject: Bug in Gawk 3.0.0, sample code:
+#
+#!/usr/local/bin/gawk -f
+#
+# Hello! This is a bug report from chowes@direct.ca
+#
+# uname -a
+# SunOS hostname 5.5 Generic sun4m
+#
+# Gnu Awk (gawk) 3.0, patchlevel 0:
+BEGIN{
+FS=":"
+while ((getline < "/etc/passwd") > 0) {
+ r=$3
+ z=0
+ n[0]=1
+ }
+FS=" "
+}
+#gawk: fp.new:16: fatal error: internal error
+#Abort
+
+# #!/usr/local/bin/gawk -f
+# # Gnu Awk (gawk) 2.15, patchlevel 6
+#
+# BEGIN{
+# f="/etc/passwd"
+# while (getline < f) n[0]=1
+# FS=" "
+# }
+# #gawk: /staff/chowes/bin/fp:7: fatal error: internal error
+# #Abort
+
+# These examples are not perfect coding style because I took a real
+# piece of code and tried to strip away anything that didn't make the error
+# message go away.
+#
+# The interesting part of the 'truss' is:
+#
+# fstat(3, 0xEFFFF278) = 0
+# lseek(3, 0, SEEK_SET) = 0
+# read(3, " r o o t : x : 0 : 1 : S".., 2291) = 2291
+# brk(0x00050020) = 0
+# brk(0x00052020) = 0
+# read(3, 0x0004F4B8, 2291) = 0
+# close(3) = 0
+# Incurred fault #6, FLTBOUNDS %pc = 0x0001B810
+# siginfo: SIGSEGV SEGV_MAPERR addr=0x00053000
+# Received signal #11, SIGSEGV [caught]
+# siginfo: SIGSEGV SEGV_MAPERR addr=0x00053000
+# write(2, " g a w k", 4) = 4
+# write(2, " : ", 2) = 2
+#
+# --
+# Charles Howes -- chowes@direct.ca Voice: (604) 691-1607
+# System Administrator Fax: (604) 691-1605
+# Internet Direct - 1050 - 555 West Hastings St - Vancouver, BC V6B 4N6
+#
+# A sysadmin's life is a sorry one. The only advantage he has over Emergency
+# Room doctors is that malpractice suits are rare. On the other hand, ER
+# doctors never have to deal with patients installing new versions of their
+# own innards! -Michael O'Brien
+#
+# "I think I know what may have gone wrong in the original s/w.
+# It's a bug in the way it was written." - Vagueness**n
diff --git a/contrib/awk/test/eofsplit.ok b/contrib/awk/test/eofsplit.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/eofsplit.ok
diff --git a/contrib/awk/test/fflush.ok b/contrib/awk/test/fflush.ok
new file mode 100644
index 0000000..4cf0df6
--- /dev/null
+++ b/contrib/awk/test/fflush.ok
@@ -0,0 +1,16 @@
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
diff --git a/contrib/awk/test/fflush.sh b/contrib/awk/test/fflush.sh
new file mode 100755
index 0000000..42d624c
--- /dev/null
+++ b/contrib/awk/test/fflush.sh
@@ -0,0 +1,16 @@
+#! /bin/sh
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat"}'
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");close("/dev/stdout");print "2nd"|"cat"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"sort"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"sort";close("sort")}'|cat
diff --git a/contrib/awk/test/fieldwdth.ok b/contrib/awk/test/fieldwdth.ok
new file mode 100644
index 0000000..51b4008
--- /dev/null
+++ b/contrib/awk/test/fieldwdth.ok
@@ -0,0 +1 @@
+345
diff --git a/contrib/awk/test/fldchg.awk b/contrib/awk/test/fldchg.awk
new file mode 100644
index 0000000..a8018f7
--- /dev/null
+++ b/contrib/awk/test/fldchg.awk
@@ -0,0 +1,8 @@
+{
+# print "0:", $0
+ gsub("aa", "+")
+ print "1:", $0
+ $3 = "<" $3 ">"
+ print "2:", $0
+ print "2a:" "%" $1 "%" $2 "%" $3 "%" $4 "%" $5
+}
diff --git a/contrib/awk/test/fldchg.in b/contrib/awk/test/fldchg.in
new file mode 100644
index 0000000..f500c36
--- /dev/null
+++ b/contrib/awk/test/fldchg.in
@@ -0,0 +1 @@
+aa aab c d e f
diff --git a/contrib/awk/test/fldchg.ok b/contrib/awk/test/fldchg.ok
new file mode 100644
index 0000000..cc5032a
--- /dev/null
+++ b/contrib/awk/test/fldchg.ok
@@ -0,0 +1,3 @@
+1: + +b c d e f
+2: + +b <c> d e f
+2a:%+%+b%<c>%d%e
diff --git a/contrib/awk/test/fldchgnf.awk b/contrib/awk/test/fldchgnf.awk
new file mode 100644
index 0000000..fbb8f11
--- /dev/null
+++ b/contrib/awk/test/fldchgnf.awk
@@ -0,0 +1 @@
+{ OFS = ":"; $2 = ""; print $0; print NF }
diff --git a/contrib/awk/test/fldchgnf.in b/contrib/awk/test/fldchgnf.in
new file mode 100644
index 0000000..8e13e46
--- /dev/null
+++ b/contrib/awk/test/fldchgnf.in
@@ -0,0 +1 @@
+a b c d
diff --git a/contrib/awk/test/fldchgnf.ok b/contrib/awk/test/fldchgnf.ok
new file mode 100644
index 0000000..10b38ed
--- /dev/null
+++ b/contrib/awk/test/fldchgnf.ok
@@ -0,0 +1,2 @@
+a::c:d
+4
diff --git a/contrib/awk/test/fnarray.awk b/contrib/awk/test/fnarray.awk
new file mode 100644
index 0000000..92a18b9
--- /dev/null
+++ b/contrib/awk/test/fnarray.awk
@@ -0,0 +1,7 @@
+function foo(N) {
+ return 0
+}
+BEGIN {
+ Num = foo[c]
+}
+
diff --git a/contrib/awk/test/fnarray.ok b/contrib/awk/test/fnarray.ok
new file mode 100644
index 0000000..94beacd
--- /dev/null
+++ b/contrib/awk/test/fnarray.ok
@@ -0,0 +1 @@
+gawk: fnarray.awk:5: fatal: attempt to use function `foo' as array
diff --git a/contrib/awk/test/fnarydel.awk b/contrib/awk/test/fnarydel.awk
new file mode 100644
index 0000000..8a1264c
--- /dev/null
+++ b/contrib/awk/test/fnarydel.awk
@@ -0,0 +1,60 @@
+#!/usr/local/bin/gawk -f
+BEGIN {
+ process()
+}
+
+function process(aa,a) {
+ delete aa
+}
+
+BEGIN {
+ for (i = 1; i < 10; i++)
+ a[i] = i;
+
+ print "first loop"
+ for (i in a)
+ print a[i]
+
+ delete a
+
+ print "second loop"
+ for (i in a)
+ print a[i]
+
+ for (i = 1; i < 10; i++)
+ a[i] = i;
+
+ print "third loop"
+ for (i in a)
+ print a[i]
+
+ print "call func"
+ delit(a)
+
+ print "fourth loop"
+ for (i in a)
+ print a[i]
+
+ stressit()
+}
+
+function delit(arr)
+{
+ delete arr
+}
+
+function stressit( array, i)
+{
+ delete array
+ array[4] = 4
+ array[5] = 5
+ delete array[5]
+ print "You should just see: 4 4"
+ for (i in array)
+ print i, array[i]
+ delete array
+ print "You should see nothing between this line"
+ for (i in array)
+ print i, array[i]
+ print "And this one"
+}
diff --git a/contrib/awk/test/fnarydel.ok b/contrib/awk/test/fnarydel.ok
new file mode 100644
index 0000000..7f3e453
--- /dev/null
+++ b/contrib/awk/test/fnarydel.ok
@@ -0,0 +1,27 @@
+first loop
+4
+5
+6
+7
+8
+9
+1
+2
+3
+second loop
+third loop
+4
+5
+6
+7
+8
+9
+1
+2
+3
+call func
+fourth loop
+You should just see: 4 4
+4 4
+You should see nothing between this line
+And this one
diff --git a/contrib/awk/test/fsbs.in b/contrib/awk/test/fsbs.in
new file mode 100644
index 0000000..0a102c3
--- /dev/null
+++ b/contrib/awk/test/fsbs.in
@@ -0,0 +1 @@
+1\2
diff --git a/contrib/awk/test/fsbs.ok b/contrib/awk/test/fsbs.ok
new file mode 100644
index 0000000..8d04f96
--- /dev/null
+++ b/contrib/awk/test/fsbs.ok
@@ -0,0 +1 @@
+1 2
diff --git a/contrib/awk/test/fsrs.awk b/contrib/awk/test/fsrs.awk
new file mode 100644
index 0000000..a001489
--- /dev/null
+++ b/contrib/awk/test/fsrs.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ RS=""; FS="\n";
+ ORS=""; OFS="\n";
+ }
+{
+ split ($2,f," ")
+ print $0;
+}
diff --git a/contrib/awk/test/fsrs.in b/contrib/awk/test/fsrs.in
new file mode 100644
index 0000000..4b49d81
--- /dev/null
+++ b/contrib/awk/test/fsrs.in
@@ -0,0 +1,7 @@
+a b
+c d
+e f
+
+1 2
+3 4
+5 6
diff --git a/contrib/awk/test/fsrs.ok b/contrib/awk/test/fsrs.ok
new file mode 100644
index 0000000..7dafd65
--- /dev/null
+++ b/contrib/awk/test/fsrs.ok
@@ -0,0 +1,5 @@
+a b
+c d
+e f1 2
+3 4
+5 6 \ No newline at end of file
diff --git a/contrib/awk/test/fstabplus.awk b/contrib/awk/test/fstabplus.awk
new file mode 100644
index 0000000..748a44f
--- /dev/null
+++ b/contrib/awk/test/fstabplus.awk
@@ -0,0 +1,2 @@
+BEGIN { FS = "\t+" }
+ { print $1, $2 }
diff --git a/contrib/awk/test/fstabplus.ok b/contrib/awk/test/fstabplus.ok
new file mode 100644
index 0000000..8d04f96
--- /dev/null
+++ b/contrib/awk/test/fstabplus.ok
@@ -0,0 +1 @@
+1 2
diff --git a/contrib/awk/test/funstack.awk b/contrib/awk/test/funstack.awk
new file mode 100644
index 0000000..ab85b45
--- /dev/null
+++ b/contrib/awk/test/funstack.awk
@@ -0,0 +1,977 @@
+### ====================================================================
+### @Awk-file{
+### author = "Nelson H. F. Beebe",
+### version = "1.00",
+### date = "09 October 1996",
+### time = "15:57:06 MDT",
+### filename = "journal-toc.awk",
+### address = "Center for Scientific Computing
+### Department of Mathematics
+### University of Utah
+### Salt Lake City, UT 84112
+### USA",
+### telephone = "+1 801 581 5254",
+### FAX = "+1 801 581 4148",
+### URL = "http://www.math.utah.edu/~beebe",
+### checksum = "25092 977 3357 26493",
+### email = "beebe@math.utah.edu (Internet)",
+### codetable = "ISO/ASCII",
+### keywords = "BibTeX, bibliography, HTML, journal table of
+### contents",
+### supported = "yes",
+### docstring = "Create a journal cover table of contents from
+### <at>Article{...} entries in a journal BibTeX
+### .bib file for checking the bibliography
+### database against the actual journal covers.
+### The output can be either plain text, or HTML.
+###
+### Usage:
+### bibclean -max-width 0 BibTeX-file(s) | \
+### bibsort -byvolume | \
+### awk -f journal-toc.awk \
+### [-v HTML=nnn] [-v INDENT=nnn] \
+### [-v BIBFILEURL=url] >foo.toc
+###
+### or if the bibliography is already sorted
+### by volume,
+###
+### bibclean -max-width 0 BibTeX-file(s) | \
+### awk -f journal-toc.awk \
+### [-v HTML=nnn] [-v INDENT=nnn] \
+### [-v BIBFILEURL=url] >foo.toc
+###
+### A non-zero value of the command-line option,
+### HTML=nnn, results in HTML output instead of
+### the default plain ASCII text (corresponding
+### to HTML=0). The
+###
+### The INDENT=nnn command-line option specifies
+### the number of blanks to indent each logical
+### level of HTML. The default is INDENT=4.
+### INDENT=0 suppresses indentation. The INDENT
+### option has no effect when the default HTML=0
+### (plain text output) option is in effect.
+###
+### When HTML output is selected, the
+### BIBFILEURL=url command-line option provides a
+### way to request hypertext links from table of
+### contents page numbers to the complete BibTeX
+### entry for the article. These links are
+### created by appending a sharp (#) and the
+### citation label to the BIBFILEURL value, which
+### conforms with the practice of
+### bibtex-to-html.awk.
+###
+### The HTML output form may be useful as a more
+### compact representation of journal article
+### bibliography data than the original BibTeX
+### file provides. Of course, the
+### table-of-contents format provides less
+### information, and is considerably more
+### troublesome for a computer program to parse.
+###
+### When URL key values are provided, they will
+### be used to create hypertext links around
+### article titles. This supports journals that
+### provide article contents on the World-Wide
+### Web.
+###
+### For parsing simplicity, this program requires
+### that BibTeX
+###
+### key = "value"
+###
+### and
+###
+### @String{name = "value"}
+###
+### specifications be entirely contained on
+### single lines, which is readily provided by
+### the `bibclean -max-width 0' filter. It also
+### requires that bibliography entries begin and
+### end at the start of a line, and that
+### quotation marks, rather than balanced braces,
+### delimit string values. This is a
+### conventional format that again can be
+### guaranteed by bibclean.
+###
+### This program requires `new' awk, as described
+### in the book
+###
+### Alfred V. Aho, Brian W. Kernighan, and
+### Peter J. Weinberger,
+### ``The AWK Programming Language'',
+### Addison-Wesley (1988), ISBN
+### 0-201-07981-X,
+###
+### such as provided by programs named (GNU)
+### gawk, nawk, and recent AT&T awk.
+###
+### The checksum field above contains a CRC-16
+### checksum as the first value, followed by the
+### equivalent of the standard UNIX wc (word
+### count) utility output of lines, words, and
+### characters. This is produced by Robert
+### Solovay's checksum utility.",
+### }
+### ====================================================================
+
+BEGIN { initialize() }
+
+/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *{/ { do_String(); next }
+
+/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/ { next }
+
+/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/ { do_Article(); next }
+
+/^ *@/ { do_Other(); next }
+
+/^ *author *= *\"/ { do_author(); next }
+
+/^ *journal *= */ { do_journal(); next }
+
+/^ *volume *= *\"/ { do_volume(); next }
+
+/^ *number *= *\"/ { do_number(); next }
+
+/^ *year *= *\"/ { do_year(); next }
+
+/^ *month *= */ { do_month(); next }
+
+/^ *title *= *\"/ { do_title(); next }
+
+/^ *pages *= *\"/ { do_pages(); next }
+
+/^ *URL *= *\"/ { do_URL(); next }
+
+/^ *} *$/ { if (In_Article) do_end_entry(); next }
+
+END { terminate() }
+
+
+########################################################################
+# NB: The programming conventions for variables in this program are: #
+# UPPERCASE global constants and user options #
+# Initialuppercase global variables #
+# lowercase local variables #
+# Any deviation is an error! #
+########################################################################
+
+
+function do_Article()
+{
+ In_Article = 1
+
+ Citation_label = $0
+ sub(/^[^\{]*{/,"",Citation_label)
+ sub(/ *, *$/,"",Citation_label)
+
+ Author = ""
+ Title = ""
+ Journal = ""
+ Volume = ""
+ Number = ""
+ Month = ""
+ Year = ""
+ Pages = ""
+ Url = ""
+}
+
+
+function do_author()
+{
+ Author = TeX_to_HTML(get_value($0))
+}
+
+
+function do_end_entry( k,n,parts)
+{
+ n = split(Author,parts," and ")
+ if (Last_number != Number)
+ do_new_issue()
+ for (k = 1; k < n; ++k)
+ print_toc_line(parts[k] " and", "", "")
+ Title_prefix = html_begin_title()
+ Title_suffix = html_end_title()
+ if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
+ print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
+ else # need to split long title over multiple lines
+ do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
+}
+
+
+function do_journal()
+{
+ if ($0 ~ /[=] *"/) # have journal = "quoted journal name",
+ Journal = get_value($0)
+ else # have journal = journal-abbreviation,
+ {
+ Journal = get_abbrev($0)
+ if (Journal in String) # replace abbrev by its expansion
+ Journal = String[Journal]
+ }
+ gsub(/\\-/,"",Journal) # remove discretionary hyphens
+}
+
+
+function do_long_title(author,title,pages, last_title,n)
+{
+ title = trim(title) # discard leading and trailing space
+ while (length(title) > 0)
+ {
+ n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
+ last_title = substr(title,1,n)
+ title = substr(title,n+1)
+ sub(/^ +/,"",title) # discard any leading space
+ print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
+ author = ""
+ }
+}
+
+
+function do_month( k,n,parts)
+{
+ Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
+ gsub(/[\"]/,"",Month)
+ gsub(/ *# *\\slash *# */," / ",Month)
+ gsub(/ *# *-+ *# */," / ",Month)
+ n = split(Month,parts," */ *")
+ Month = ""
+ for (k = 1; k <= n; ++k)
+ Month = Month ((k > 1) ? " / " : "") \
+ ((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
+}
+
+
+function do_new_issue()
+{
+ Last_number = Number
+ if (HTML)
+ {
+ if (Last_volume != Volume)
+ {
+ Last_volume = Volume
+ print_line(prefix(2) "<BR>")
+ }
+ html_end_toc()
+ html_begin_issue()
+ print_line(prefix(2) Journal "<BR>")
+ }
+ else
+ {
+ print_line("")
+ print_line(Journal)
+ }
+
+ print_line(strip_html(vol_no_month_year()))
+
+ if (HTML)
+ {
+ html_end_issue()
+ html_toc_entry()
+ html_begin_toc()
+ }
+ else
+ print_line("")
+}
+
+
+function do_number()
+{
+ Number = get_value($0)
+}
+
+
+function do_Other()
+{
+ In_Article = 0
+}
+
+
+function do_pages()
+{
+ Pages = get_value($0)
+ sub(/--[?][?]/,"",Pages)
+}
+
+
+function do_String()
+{
+ sub(/^[^\{]*\{/,"",$0) # discard up to and including open brace
+ sub(/\} *$/,"",$0) # discard from optional whitespace and trailing brace to end of line
+ String[get_key($0)] = get_value($0)
+}
+
+
+function do_title()
+{
+ Title = TeX_to_HTML(get_value($0))
+}
+
+
+function do_URL( parts)
+{
+ Url = get_value($0)
+ split(Url,parts,"[,;]") # in case we have multiple URLs
+ Url = trim(parts[1])
+}
+
+
+function do_volume()
+{
+ Volume = get_value($0)
+}
+
+
+function do_year()
+{
+ Year = get_value($0)
+}
+
+
+function get_abbrev(s)
+{ # return abbrev from ``key = abbrev,''
+ sub(/^[^=]*= */,"",s) # discard text up to start of non-blank value
+ sub(/ *,? *$/,"",s) # discard trailing optional whitspace, quote,
+ # optional comma, and optional space
+ return (s)
+}
+
+
+function get_key(s)
+{ # return kay from ``key = "value",''
+ sub(/^ */,"",s) # discard leading space
+ sub(/ *=.*$/,"",s) # discard everthing after key
+
+ return (s)
+}
+
+
+function get_value(s)
+{ # return value from ``key = "value",''
+ sub(/^[^\"]*\" */,"",s) # discard text up to start of non-blank value
+ sub(/ *\",? *$/,"",s) # discard trailing optional whitspace, quote,
+ # optional comma, and optional space
+ return (s)
+}
+
+
+function html_accents(s)
+{
+ if (index(s,"\\") > 0) # important optimization
+ {
+ # Convert common lower-case accented letters according to the
+ # table on p. 169 of in Peter Flynn's ``The World Wide Web
+ # Handbook'', International Thomson Computer Press, 1995, ISBN
+ # 1-85032-205-8. The official table of ISO Latin 1 SGML
+ # entities used in HTML can be found in the file
+ # /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
+ # may differ).
+
+ gsub(/{\\\a}/, "\\&agrave;", s)
+ gsub(/{\\'a}/, "\\&aacute;", s)
+ gsub(/{\\[\^]a}/,"\\&acirc;", s)
+ gsub(/{\\~a}/, "\\&atilde;", s)
+ gsub(/{\\\"a}/, "\\&auml;", s)
+ gsub(/{\\aa}/, "\\&aring;", s)
+ gsub(/{\\ae}/, "\\&aelig;", s)
+
+ gsub(/{\\c{c}}/,"\\&ccedil;", s)
+
+ gsub(/{\\\e}/, "\\&egrave;", s)
+ gsub(/{\\'e}/, "\\&eacute;", s)
+ gsub(/{\\[\^]e}/,"\\&ecirc;", s)
+ gsub(/{\\\"e}/, "\\&euml;", s)
+
+ gsub(/{\\\i}/, "\\&igrave;", s)
+ gsub(/{\\'i}/, "\\&iacute;", s)
+ gsub(/{\\[\^]i}/,"\\&icirc;", s)
+ gsub(/{\\\"i}/, "\\&iuml;", s)
+
+ # ignore eth and thorn
+
+ gsub(/{\\~n}/, "\\&ntilde;", s)
+
+ gsub(/{\\\o}/, "\\&ograve;", s)
+ gsub(/{\\'o}/, "\\&oacute;", s)
+ gsub(/{\\[\^]o}/, "\\&ocirc;", s)
+ gsub(/{\\~o}/, "\\&otilde;", s)
+ gsub(/{\\\"o}/, "\\&ouml;", s)
+ gsub(/{\\o}/, "\\&oslash;", s)
+
+ gsub(/{\\\u}/, "\\&ugrave;", s)
+ gsub(/{\\'u}/, "\\&uacute;", s)
+ gsub(/{\\[\^]u}/,"\\&ucirc;", s)
+ gsub(/{\\\"u}/, "\\&uuml;", s)
+
+ gsub(/{\\'y}/, "\\&yacute;", s)
+ gsub(/{\\\"y}/, "\\&yuml;", s)
+
+ # Now do the same for upper-case accents
+
+ gsub(/{\\\A}/, "\\&Agrave;", s)
+ gsub(/{\\'A}/, "\\&Aacute;", s)
+ gsub(/{\\[\^]A}/, "\\&Acirc;", s)
+ gsub(/{\\~A}/, "\\&Atilde;", s)
+ gsub(/{\\\"A}/, "\\&Auml;", s)
+ gsub(/{\\AA}/, "\\&Aring;", s)
+ gsub(/{\\AE}/, "\\&AElig;", s)
+
+ gsub(/{\\c{C}}/,"\\&Ccedil;", s)
+
+ gsub(/{\\\e}/, "\\&Egrave;", s)
+ gsub(/{\\'E}/, "\\&Eacute;", s)
+ gsub(/{\\[\^]E}/, "\\&Ecirc;", s)
+ gsub(/{\\\"E}/, "\\&Euml;", s)
+
+ gsub(/{\\\I}/, "\\&Igrave;", s)
+ gsub(/{\\'I}/, "\\&Iacute;", s)
+ gsub(/{\\[\^]I}/, "\\&Icirc;", s)
+ gsub(/{\\\"I}/, "\\&Iuml;", s)
+
+ # ignore eth and thorn
+
+ gsub(/{\\~N}/, "\\&Ntilde;", s)
+
+ gsub(/{\\\O}/, "\\&Ograve;", s)
+ gsub(/{\\'O}/, "\\&Oacute;", s)
+ gsub(/{\\[\^]O}/, "\\&Ocirc;", s)
+ gsub(/{\\~O}/, "\\&Otilde;", s)
+ gsub(/{\\\"O}/, "\\&Ouml;", s)
+ gsub(/{\\O}/, "\\&Oslash;", s)
+
+ gsub(/{\\\U}/, "\\&Ugrave;", s)
+ gsub(/{\\'U}/, "\\&Uacute;", s)
+ gsub(/{\\[\^]U}/, "\\&Ucirc;", s)
+ gsub(/{\\\"U}/, "\\&Uuml;", s)
+
+ gsub(/{\\'Y}/, "\\&Yacute;", s)
+
+ gsub(/{\\ss}/, "\\&szlig;", s)
+
+ # Others not mentioned in Flynn's book
+ gsub(/{\\'\\i}/,"\\&iacute;", s)
+ gsub(/{\\'\\j}/,"j", s)
+ }
+ return (s)
+}
+
+
+function html_begin_issue()
+{
+ print_line("")
+ print_line(prefix(2) "<HR>")
+ print_line("")
+ print_line(prefix(2) "<H1>")
+ print_line(prefix(3) "<A NAME=\"" html_label() "\">")
+}
+
+
+function html_begin_pages()
+{
+ return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
+}
+
+
+function html_begin_pre()
+{
+ In_PRE = 1
+ print_line("<PRE>")
+}
+
+
+function html_begin_title()
+{
+ return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
+}
+
+
+function html_begin_toc()
+{
+ html_end_toc()
+ html_begin_pre()
+}
+
+
+function html_body( k)
+{
+ for (k = 1; k <= BodyLines; ++k)
+ print Body[k]
+}
+
+function html_breakpoint(title,maxlength, break_after,k)
+{
+ # Return the largest character position in title AFTER which we
+ # can break the title across lines, without exceeding maxlength
+ # visible characters.
+ if (html_length(title) > maxlength) # then need to split title across lines
+ {
+ # In the presence of HTML markup, the initialization of
+ # k here is complicated, because we need to advance it
+ # until html_length(title) is at least maxlength,
+ # without invoking the expensive html_length() function
+ # too frequently. The need to split the title makes the
+ # alternative of delayed insertion of HTML markup much
+ # more complicated.
+ break_after = 0
+ for (k = min(maxlength,length(title)); k < length(title); ++k)
+ {
+ if (substr(title,k+1,1) == " ")
+ { # could break after position k
+ if (html_length(substr(title,1,k)) <= maxlength)
+ break_after = k
+ else # advanced too far, retreat back to last break_after
+ break
+ }
+ }
+ if (break_after == 0) # no breakpoint found by forward scan
+ { # so switch to backward scan
+ for (k = min(maxlength,length(title)) - 1; \
+ (k > 0) && (substr(title,k+1,1) != " "); --k)
+ ; # find space at which to break title
+ if (k < 1) # no break point found
+ k = length(title) # so must print entire string
+ }
+ else
+ k = break_after
+ }
+ else # title fits on one line
+ k = length(title)
+ return (k)
+}
+
+
+
+function html_end_issue()
+{
+ print_line(prefix(3) "</A>")
+ print_line(prefix(2) "</H1>")
+}
+
+
+function html_end_pages()
+{
+ return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
+}
+
+
+function html_end_pre()
+{
+ if (In_PRE)
+ {
+ print_line("</PRE>")
+ In_PRE = 0
+ }
+}
+
+
+function html_end_title()
+{
+ return ((HTML && (Url != "")) ? "</A>" : "")
+}
+
+
+function html_end_toc()
+{
+ html_end_pre()
+}
+
+
+function html_fonts(s, arg,control_word,k,level,n,open_brace)
+{
+ open_brace = index(s,"{")
+ if (open_brace > 0) # important optimization
+ {
+ level = 1
+ for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
+ {
+ if (substr(s,k,1) == "{")
+ level++
+ else if (substr(s,k,1) == "}")
+ level--
+ }
+
+ # {...} is now found at open_brace ... (k-1)
+ for (control_word in Font_decl_map) # look for {\xxx ...}
+ {
+ if (substr(s,open_brace+1,length(control_word)+1) ~ \
+ ("\\" control_word "[^A-Za-z]"))
+ {
+ n = open_brace + 1 + length(control_word)
+ arg = trim(substr(s,n,k - n))
+ if (Font_decl_map[control_word] == "toupper") # arg -> ARG
+ arg = toupper(arg)
+ else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
+ arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
+ return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
+ }
+ }
+ for (control_word in Font_cmd_map) # look for \xxx{...}
+ {
+ if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
+ ("\\" control_word))
+ {
+ n = open_brace + 1
+ arg = trim(substr(s,n,k - n))
+ if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
+ arg = toupper(arg)
+ else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
+ arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
+ n = open_brace - length(control_word) - 1
+ return (substr(s,1,n) arg html_fonts(substr(s,k)))
+ }
+ }
+ }
+ return (s)
+}
+
+
+function html_header()
+{
+ USER = ENVIRON["USER"]
+ if (USER == "")
+ USER = ENVIRON["LOGNAME"]
+ if (USER == "")
+ USER = "????"
+ "hostname" | getline HOSTNAME
+ "date" | getline DATE
+ ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
+ if (PERSONAL_NAME == "")
+ ("grep '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
+
+
+ print "<!-- WARNING: Do NOT edit this file. It was converted from -->"
+ print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
+ print "<!-- on " DATE " -->"
+ print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
+ print ""
+ print ""
+ print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
+ print ""
+ print "<HTML>"
+ print prefix(1) "<HEAD>"
+ print prefix(2) "<TITLE>"
+ print prefix(3) Journal
+ print prefix(2) "</TITLE>"
+ print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
+ print prefix(1) "</HEAD>"
+ print ""
+ print prefix(1) "<BODY>"
+}
+
+
+function html_label( label)
+{
+ label = Volume "(" Number "):" Month ":" Year
+ gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
+ return (label)
+}
+
+
+function html_length(s)
+{ # Return visible length of s, ignoring any HTML markup
+ if (HTML)
+ {
+ gsub(/<\/?[^>]*>/,"",s) # remove SGML tags
+ gsub(/&[A-Za-z0-9]+;/,"",s) # remove SGML entities
+ }
+ return (length(s))
+}
+
+
+function html_toc()
+{
+ print prefix(2) "<H1>"
+ print prefix(3) "Table of contents for issues of " Journal
+ print prefix(2) "</H1>"
+ print HTML_TOC
+}
+
+
+function html_toc_entry()
+{
+ HTML_TOC = HTML_TOC " <A HREF=\"#" html_label() "\">"
+ HTML_TOC = HTML_TOC vol_no_month_year()
+ HTML_TOC = HTML_TOC "</A><BR>" "\n"
+}
+
+
+function html_trailer()
+{
+ html_end_pre()
+ print prefix(1) "</BODY>"
+ print "</HTML>"
+}
+
+
+function initialize()
+{
+ # NB: Update these when the program changes
+ VERSION_DATE = "[09-Oct-1996]"
+ VERSION_NUMBER = "1.00"
+
+ HTML = (HTML == "") ? 0 : (0 + HTML)
+
+ if (INDENT == "")
+ INDENT = 4
+
+ if (HTML == 0)
+ INDENT = 0 # indentation suppressed in ASCII mode
+
+ LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
+
+ MAX_TITLE_CHARS = 36 # 36 produces a 79-char output line when there is
+ # just an initial page number. If this is
+ # increased, the LEADERS string may need to be
+ # lengthened.
+
+ MIN_LEADERS = 4 # Minimum number of characters from LEADERS
+ # required when leaders are used. The total
+ # number of characters that can appear in a
+ # title line is MAX_TITLE_CHARS + MIN_LEADERS.
+ # Leaders are omitted when the title length is
+ # between MAX_TITLE_CHARS and this sum.
+
+ MIN_LEADERS_SPACE = " " # must be at least MIN_LEADERS characters long
+
+ Month_expansion["jan"] = "January"
+ Month_expansion["feb"] = "February"
+ Month_expansion["mar"] = "March"
+ Month_expansion["apr"] = "April"
+ Month_expansion["may"] = "May"
+ Month_expansion["jun"] = "June"
+ Month_expansion["jul"] = "July"
+ Month_expansion["aug"] = "August"
+ Month_expansion["sep"] = "September"
+ Month_expansion["oct"] = "October"
+ Month_expansion["nov"] = "November"
+ Month_expansion["dec"] = "December"
+
+ Font_cmd_map["\\emph"] = "EM"
+ Font_cmd_map["\\textbf"] = "B"
+ Font_cmd_map["\\textit"] = "I"
+ Font_cmd_map["\\textmd"] = ""
+ Font_cmd_map["\\textrm"] = ""
+ Font_cmd_map["\\textsc"] = "toupper"
+ Font_cmd_map["\\textsl"] = "I"
+ Font_cmd_map["\\texttt"] = "t"
+ Font_cmd_map["\\textup"] = ""
+
+ Font_decl_map["\\bf"] = "B"
+ Font_decl_map["\\em"] = "EM"
+ Font_decl_map["\\it"] = "I"
+ Font_decl_map["\\rm"] = ""
+ Font_decl_map["\\sc"] = "toupper"
+ Font_decl_map["\\sf"] = ""
+ Font_decl_map["\\tt"] = "TT"
+ Font_decl_map["\\itshape"] = "I"
+ Font_decl_map["\\upshape"] = ""
+ Font_decl_map["\\slshape"] = "I"
+ Font_decl_map["\\scshape"] = "toupper"
+ Font_decl_map["\\mdseries"] = ""
+ Font_decl_map["\\bfseries"] = "B"
+ Font_decl_map["\\rmfamily"] = ""
+ Font_decl_map["\\sffamily"] = ""
+ Font_decl_map["\\ttfamily"] = "TT"
+}
+
+function min(a,b)
+{
+ return (a < b) ? a : b
+}
+
+
+function prefix(level)
+{
+ # Return a prefix of up to 60 blanks
+
+ if (In_PRE)
+ return ("")
+ else
+ return (substr(" ", \
+ 1, INDENT * level))
+}
+
+
+function print_line(line)
+{
+ if (HTML) # must buffer in memory so that we can accumulate TOC
+ Body[++BodyLines] = line
+ else
+ print line
+}
+
+
+function print_toc_line(author,title,pages, extra,leaders,n,t)
+{
+ # When we have a multiline title, the hypertext link goes only
+ # on the first line. A multiline hypertext link looks awful
+ # because of long underlines under the leading indentation.
+
+ if (pages == "") # then no leaders needed in title lines other than last one
+ t = sprintf("%31s %s%s%s", author, Title_prefix, title, Title_suffix)
+ else # last title line, with page number
+ {
+ n = html_length(title) # potentially expensive
+ extra = n % 2 # extra space for aligned leader dots
+ if (n <= MAX_TITLE_CHARS) # then need leaders
+ leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
+ min(MAX_TITLE_CHARS,n))
+ else # title (almost) fills line, so no leaders
+ leaders = substr(MIN_LEADERS_SPACE,1, \
+ (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
+ t = sprintf("%31s %s%s%s%s%s %4s", \
+ author, Title_prefix, title, Title_suffix, \
+ (extra ? " " : ""), leaders, pages)
+ }
+
+ Title_prefix = "" # forget any hypertext
+ Title_suffix = "" # link material
+
+ # Efficency note: an earlier version accumulated the body in a
+ # single scalar like this: "Body = Body t". Profiling revealed
+ # this statement as the major hot spot, and the change to array
+ # storage made the program more than twice as fast. This
+ # suggests that awk might benefit from an optimization of
+ # "s = s t" that uses realloc() instead of malloc().
+ if (HTML)
+ Body[++BodyLines] = t
+ else
+ print t
+}
+
+
+function protect_SGML_characters(s)
+{
+ gsub(/&/,"\\&amp;",s) # NB: this one MUST be first
+ gsub(/</,"\\&lt;",s)
+ gsub(/>/,"\\&gt;",s)
+ gsub(/\"/,"\\&quot;",s)
+ return (s)
+}
+
+
+function strip_braces(s, k)
+{ # strip non-backslashed braces from s and return the result
+
+ return (strip_char(strip_char(s,"{"),"}"))
+}
+
+
+function strip_char(s,c, k)
+{ # strip non-backslashed instances of c from s, and return the result
+ k = index(s,c)
+ if (k > 0) # then found the character
+ {
+ if (substr(s,k-1,1) != "\\") # then not backslashed char
+ s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
+ else # preserve backslashed char
+ s = substr(s,1,k) strip_char(s,k+1,c)
+ }
+ return (s)
+}
+
+
+function strip_html(s)
+{
+ gsub(/<\/?[^>]*>/,"",s)
+ return (s)
+}
+
+
+function terminate()
+{
+ if (HTML)
+ {
+ html_end_pre()
+
+ HTML = 0 # NB: stop line buffering
+ html_header()
+ html_toc()
+ html_body()
+ html_trailer()
+ }
+}
+
+
+function TeX_to_HTML(s, k,n,parts)
+{
+ # First convert the four SGML reserved characters to SGML entities
+ if (HTML)
+ {
+ gsub(/>/, "\\&gt;", s)
+ gsub(/</, "\\&lt;", s)
+ gsub(/"/, "\\&quot;", s)
+ }
+
+ gsub(/[$][$]/,"$$",s) # change display math to triple dollars for split
+ n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
+
+ s = ""
+ for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
+ s = s ((k > 1) ? "$" : "") \
+ ((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
+ TeX_to_HTML_math(parts[k]))
+
+ gsub(/[$][$][$]/,"$$",s) # restore display math
+
+ return (s)
+}
+
+
+function TeX_to_HTML_math(s)
+{
+ # Mostly a dummy for now, but HTML 3 could support some math translation
+
+ gsub(/\\&/,"\\&amp;",s) # reduce TeX ampersands to SGML entities
+
+ return (s)
+}
+
+
+function TeX_to_HTML_nonmath(s)
+{
+ if (index(s,"\\") > 0) # important optimization
+ {
+ gsub(/\\slash +/,"/",s) # replace TeX slashes with conventional ones
+ gsub(/ *\\emdash +/," --- ",s) # replace BibNet emdashes with conventional ones
+ gsub(/\\%/,"%",s) # reduce TeX percents to conventional ones
+ gsub(/\\[$]/,"$",s) # reduce TeX dollars to conventional ones
+ gsub(/\\#/,"#",s) # reduce TeX sharps to conventional ones
+
+ if (HTML) # translate TeX markup to HTML
+ {
+ gsub(/\\&/,"\\&amp;",s) # reduce TeX ampersands to SGML entities
+ s = html_accents(s)
+ s = html_fonts(s)
+ }
+ else # plain ASCII text output: discard all TeX markup
+ {
+ gsub(/\\\&/, "\\&", s) # reduce TeX ampersands to conventional ones
+
+ gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
+ gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
+ }
+ }
+ return (s)
+}
+
+
+function trim(s)
+{
+ gsub(/^[ \t]+/,"",s)
+ gsub(/[ \t]+$/,"",s)
+ return (s)
+}
+
+
+function vol_no_month_year()
+{
+ return ("Volume " wrap(Volume) ", Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
+}
+
+
+function wrap(value)
+{
+ return (HTML ? ("<STRONG>" value "</STRONG>") : value)
+}
diff --git a/contrib/awk/test/funstack.in b/contrib/awk/test/funstack.in
new file mode 100644
index 0000000..7a29a25
--- /dev/null
+++ b/contrib/awk/test/funstack.in
@@ -0,0 +1,206 @@
+%%% ====================================================================
+%%% BibTeX-file{
+%%% author = "Nelson H. F. Beebe",
+%%% version = "2.09",
+%%% date = "26 March 1997",
+%%% time = "08:21:19 MST",
+%%% filename = "cacm1970.bib",
+%%% address = "Center for Scientific Computing
+%%% Department of Mathematics
+%%% University of Utah
+%%% Salt Lake City, UT 84112
+%%% USA",
+%%% telephone = "+1 801 581 5254",
+%%% FAX = "+1 801 581 4148",
+%%% checksum = "50673 40670 196033 1787829",
+%%% email = "beebe at math.utah.edu (Internet)",
+%%% codetable = "ISO/ASCII",
+%%% keywords = "bibliography, CACM, Communications of the
+%%% ACM",
+%%% supported = "yes",
+%%% docstring = "This is a bibliography of the journal
+%%% Communications of the ACM, covering
+%%% (incompletely) 1970 -- 1979.
+%%%
+%%% At version 2.09, the year coverage looked
+%%% like this:
+%%%
+%%% 1961 ( 1) 1972 (168) 1983 ( 0)
+%%% 1962 ( 1) 1973 (158) 1984 ( 0)
+%%% 1963 ( 2) 1974 (127) 1985 ( 2)
+%%% 1964 ( 2) 1975 (107) 1986 ( 0)
+%%% 1965 ( 1) 1976 ( 97) 1987 ( 0)
+%%% 1966 ( 2) 1977 (117) 1988 ( 0)
+%%% 1967 ( 1) 1978 (118) 1989 ( 0)
+%%% 1968 ( 1) 1979 ( 78) 1990 ( 2)
+%%% 1969 ( 3) 1980 ( 1) 1991 ( 4)
+%%% 1970 (157) 1981 ( 2) 1992 ( 1)
+%%% 1971 (104) 1982 ( 1)
+%%%
+%%% Article: 1252
+%%% Book: 2
+%%% InProceedings: 1
+%%% Manual: 1
+%%% MastersThesis: 1
+%%% PhdThesis: 1
+%%%
+%%% Total entries: 1258
+%%%
+%%% The size of the original cacm.bib file
+%%% covering 1958--1996 became too large (about
+%%% 4000 entries) for BibTeX and TeX to handle,
+%%% so at version 1.44, it was split into
+%%% cacm1950.bib, cacm1960.bib, cacm1970.bib,
+%%% cacm1980.bib, and cacm1990.bib, each covering
+%%% the decade starting with the year embedded in
+%%% the filename. Version numbers for these
+%%% files begin at 2.00.
+%%%
+%%% Volumes from the 1990s average more than 200
+%%% articles yearly, so a complete bibliography
+%%% for this journal could contain more than 6000
+%%% entries from 1958 to 2000.
+%%%
+%%% These bibliographies also include ACM
+%%% Algorithms 1--492. For Algorithms 493--686,
+%%% including Algorithm 568, published in ACM
+%%% Transactions on Programming Languages and
+%%% Systems (TOPLAS), see the companion
+%%% bibliographies, toms.bib and toplas.bib.
+%%%
+%%% All published Remarks and Corrigenda are
+%%% cross-referenced in both directions, so
+%%% that citing a paper will automatically
+%%% generate citations for those Remarks and
+%%% Corrigenda. Cross-referenced entries are
+%%% duplicated in cacm19*.bib and toms.bib, so
+%%% that each is completely self-contained.
+%%%
+%%% Source code for ACM Algorithms from 380
+%%% onwards, with some omissions, is available
+%%% via the Netlib service at
+%%% http://netlib.ornl.gov/, and
+%%% ftp://netlib.bell-labs.com/netlib/toms.
+%%%
+%%% There is a World Wide Web search facility
+%%% for articles published in this journal from
+%%% 1959 to 1979 at
+%%% http://ciir.cs.umass.edu/cgi-bin/web_query_form/public/cacm2.1.
+%%%
+%%% The initial draft of entries for 1981 --
+%%% 1990 was extracted from the ACM Computing
+%%% Archive CD ROM for the 1980s, with manual
+%%% corrections and additions. Additions were
+%%% then made from all of the bibliographies in
+%%% the TeX User Group collection, from
+%%% bibliographies in the author's personal
+%%% files, from the Compendex database
+%%% (1970--1979), from the IEEE INSPEC database
+%%% (1970--1979), from tables of contents
+%%% information at http://www.acm.org/pubs/cacm/,
+%%% from Zentralblatt fur Mathematik Mathematics
+%%% Abstracts at
+%%% http://www.emis.de/cgi-bin/MATH/, from
+%%% bibliographies at Internet host
+%%% netlib.bell-labs.com, and from the computer
+%%% science bibliography collection on
+%%% ftp.ira.uka.de in /pub/bibliography to which
+%%% many people of have contributed. The
+%%% snapshot of this collection was taken on
+%%% 5-May-1994, and it consists of 441 BibTeX
+%%% files, 2,672,675 lines, 205,289 entries, and
+%%% 6,375 <at>String{} abbreviations, occupying
+%%% 94.8MB of disk space.
+%%%
+%%% Numerous errors in the sources noted above
+%%% have been corrected. Spelling has been
+%%% verified with the UNIX spell and GNU ispell
+%%% programs using the exception dictionary
+%%% stored in the companion file with extension
+%%% .sok.
+%%%
+%%% BibTeX citation tags are uniformly chosen
+%%% as name:year:abbrev, where name is the
+%%% family name of the first author or editor,
+%%% year is a 4-digit number, and abbrev is a
+%%% 3-letter condensation of important title
+%%% words. Citation tags were automatically
+%%% generated by software developed for the
+%%% BibNet Project.
+%%%
+%%% In this bibliography, entries are sorted in
+%%% publication order within each journal,
+%%% using bibsort -byvolume.
+%%%
+%%% The checksum field above contains a CRC-16
+%%% checksum as the first value, followed by the
+%%% equivalent of the standard UNIX wc (word
+%%% count) utility output of lines, words, and
+%%% characters. This is produced by Robert
+%%% Solovay's checksum utility.",
+%%% }
+%%% ====================================================================
+
+@Preamble{"\input bibnames.sty " # "\input path.sty " # "\def \TM {${}^{\sc TM}$} " # "\hyphenation{ al-pha-mer-ic Balz-er Blom-quist Bo-ta-fo-go Bran-din Brans-comb Bu-tera Chris-tina Christ-o-fi-des Col-lins Cor-dell data-base econ-omies Fletch-er
+ flow-chart flow-charts Fry-styk ge-dank-en Gar-fink-el Ge-ha-ni Glush-ko Goud-reau Gua-dan-go Hari-di Haw-thorn Hem-men-ding-er Hor-o-witz Hour-vitz Hirsch-berg Ike-da Ka-chi-tvi-chyan-u-kul Kat-ze-nel-son Kitz-miller Ko-ba-yashi Le-Me-tay-er Ken-ne-dy
+ Law-rence Mac-kay Mai-net-ti Mar-sa-glia Max-well Mer-ner Mo-ran-di Na-ray-an New-ell Nich-ols para-digm pat-ent-ed Phi-lo-kyp-rou Prep-a-ra-ta pseu-do-chain-ing QUIK-SCRIPT Rad-e-mach-er re-eval-u-a-tion re-wind Ros-witha Scheu-er-mann Schwach-heim
+ Schob-bens Schon-berg Sho-sha-ni Si-tha-ra-ma Skwa-rec-ki Streck-er Strin-gi-ni Tes-ler Te-zu-ka Teu-ho-la Till-quist Town-send Tsi-chri-tzis Tur-ski Vuille-min Wald-ing-er Za-bo-row-ski Za-mora }"}
+
+%=======================================================================
+% Acknowledgement abbreviations:
+
+@String{ack-nhfb = "Nelson H. F. Beebe, Center for Scientific Computing, Department of Mathematics, University of Utah, Salt Lake City, UT 84112, USA, Tel: +1 801 581 5254, FAX: +1 801 581 4148, e-mail: \path|beebe@math.utah.edu|"}
+
+@String{ack-nj = "Norbert Juffa, 2445 Mission College Blvd. Santa Clara, CA 95054 USA email: \path=norbert@iit.com="}
+
+%=======================================================================
+% Journal abbreviations:
+
+@String{j-CACM = "Communications of the ACM"}
+
+@String{j-COMP-SURV = "Computing Surveys"}
+
+@String{j-J-ACM = "Journal of the ACM"}
+
+@String{j-MANAGEMENT-SCIENCE = "Management Science"}
+
+@String{j-SIAM-J-COMPUT = "SIAM Journal of Computing"}
+
+@String{j-SPE = "Software --- Practice and Experience"}
+
+@String{j-TOMS = "ACM Transactions on Mathematical Software"}
+
+%=======================================================================
+% Publisher abbreviations:
+
+@String{pub-ANSI = "American National Standards Institute"}
+
+@String{pub-ANSI:adr = "1430 Broadway, New York, NY 10018, USA"}
+
+@String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
+
+@String{pub-AW:adr = "Reading, MA, USA"}
+
+@String{pub-SUCSLI = "Stanford University Center for the Study of Language and Information"}
+
+@String{pub-SUCSLI:adr = "Stanford, CA, USA"}
+
+@String{pub-SV = "Spring{\-}er-Ver{\-}lag"}
+
+@String{pub-SV:adr = "Berlin, Germany~/ Heidelberg, Germany~/ London, UK~/ etc."}
+@MastersThesis{Dittmer:1976:IEP,
+ author = "Ingo Dittmer",
+ title = "{Implementation eines Einschrittcompilers f{\"u}r die Progammiersprache PASCAL auf der Rechenanlage IBM\slash 360 der Universit{\"a}t M{\"u}nster}. ({English} title: Implementation of a One-Step Compiler for the Programming Language
+ {PASCAL} on the {IBM}\slash 360 of the {University of Muenster})",
+ type = "Diplomearbeit",
+ school = "Universit{\"a}t M{\"u}nster",
+ address = "M{\"u}nster, Germany",
+ pages = "??",
+ month = "??",
+ year = "1976",
+ bibdate = "Sat Feb 17 13:24:29 1996",
+ note = "Diplomearbeit M{\"u}nster 1976 und doert angegebene Literatur (English: Muenster diploma work 1976 and the literature cited therein). The hashing method was rediscovered fourteen years later by Pearson \cite{Pearson:1990:FHV}, and then
+ commented on by several authors \cite{Dittmer:1991:NFH,Savoy:1991:NFH,Litsios:1991:NFH,Pearson:1991:NFH}.",
+ acknowledgement = ack-nhfb,
+ xxnote = "Cannot find in Dissertation Abstracts, European.",
+}
diff --git a/contrib/awk/test/funstack.ok b/contrib/awk/test/funstack.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/funstack.ok
diff --git a/contrib/awk/test/gensub.awk b/contrib/awk/test/gensub.awk
new file mode 100644
index 0000000..f91d84d
--- /dev/null
+++ b/contrib/awk/test/gensub.awk
@@ -0,0 +1,7 @@
+BEGIN { a = "this is a test of gawk"
+ b = gensub(/(this).*(test).*(gawk)/, "3 = <\\3>, 2 = <\\2>, 1 = <\\1>", 1, a)
+ print b
+}
+NR == 1 { print gensub(/b/, "BB", 2) }
+NR == 2 { print gensub(/c/, "CC", "global") }
+END { print gensub(/foo/, "bar", 1, "DON'T PANIC") }
diff --git a/contrib/awk/test/gensub.in b/contrib/awk/test/gensub.in
new file mode 100644
index 0000000..96c9faf
--- /dev/null
+++ b/contrib/awk/test/gensub.in
@@ -0,0 +1,2 @@
+a b c a b c a b c
+a b c a b c a b c
diff --git a/contrib/awk/test/gensub.ok b/contrib/awk/test/gensub.ok
new file mode 100644
index 0000000..b9ea3de
--- /dev/null
+++ b/contrib/awk/test/gensub.ok
@@ -0,0 +1,4 @@
+3 = <gawk>, 2 = <test>, 1 = <this>
+a b c a BB c a b c
+a b CC a b CC a b CC
+DON'T PANIC
diff --git a/contrib/awk/test/getline.awk b/contrib/awk/test/getline.awk
new file mode 100644
index 0000000..f4e413f
--- /dev/null
+++ b/contrib/awk/test/getline.awk
@@ -0,0 +1 @@
+BEGIN { while( getline > 0) { print } }
diff --git a/contrib/awk/test/getline.ok b/contrib/awk/test/getline.ok
new file mode 100644
index 0000000..9b7f2b9
--- /dev/null
+++ b/contrib/awk/test/getline.ok
@@ -0,0 +1,2 @@
+BEGIN { while( getline > 0) { print } }
+BEGIN { while( getline > 0) { print } }
diff --git a/contrib/awk/test/getlnhd.awk b/contrib/awk/test/getlnhd.awk
new file mode 100644
index 0000000..f0f801b
--- /dev/null
+++ b/contrib/awk/test/getlnhd.awk
@@ -0,0 +1,10 @@
+BEGIN { pipe = "cat <<EOF\n"
+ pipe = pipe "select * from user\n"
+ pipe = pipe " where Name = 'O\\'Donell'\n"
+ pipe = pipe "EOF\n"
+
+ while ((pipe | getline) > 0)
+ print
+
+ exit 0
+}
diff --git a/contrib/awk/test/getlnhd.ok b/contrib/awk/test/getlnhd.ok
new file mode 100644
index 0000000..d8cb453
--- /dev/null
+++ b/contrib/awk/test/getlnhd.ok
@@ -0,0 +1,2 @@
+select * from user
+ where Name = 'O\'Donell'
diff --git a/contrib/awk/test/gnureops.awk b/contrib/awk/test/gnureops.awk
new file mode 100644
index 0000000..15b9b84
--- /dev/null
+++ b/contrib/awk/test/gnureops.awk
@@ -0,0 +1,45 @@
+# test the gnu regexp ops
+
+BEGIN {
+ if ("a rat is here" ~ /\yrat/) print "test 1 ok (\\y)"
+ else print "test 1 failed (\\y)"
+ if ("a rat is here" ~ /rat\y/) print "test 2 ok (\\y)"
+ else print "test 2 failed (\\y)"
+ if ("what a brat" !~ /\yrat/) print "test 3 ok (\\y)"
+ else print "test 3 failed (\\y)"
+
+ if ("in the crate" ~ /\Brat/) print "test 4 ok (\\B)"
+ else print "test 4 failed (\\B)"
+ if ("a rat" !~ /\Brat/) print "test 5 ok (\\B)"
+ else print "test 5 failed (\\B)"
+
+ if ("a word" ~ /\<word/) print "test 6 ok (\\<)"
+ else print "test 6 failed (\\<)"
+ if ("foreword" !~ /\<word/) print "test 7 ok (\\<)"
+ else print "test 7 failed (\\<)"
+
+ if ("a word" ~ /word\>/) print "test 8 ok (\\>)"
+ else print "test 8 failed (\\\\>)"
+ if ("wordy" !~ /word\>/) print "test 9 ok (\\>)"
+ else print "test 9 failed (\\>)"
+
+ if ("a" ~ /\w/) print "test 10 ok (\\w)"
+ else print "test 10 failed (\\\\w)"
+ if ("+" !~ /\w/) print "test 11 ok (\\w)"
+ else print "test 11 failed (\\w)"
+
+ if ("a" !~ /\W/) print "test 12 ok (\\W)"
+ else print "test 12 failed (\\W)"
+ if ("+" ~ /\W/) print "test 13 ok (\\W)"
+ else print "test 13 failed (\\W)"
+
+ if ("a" ~ /\`a/) print "test 14 ok (\\`)"
+ else print "test 14 failed (\\`)"
+ if ("b" !~ /\`a/) print "test 15 ok (\\`)"
+ else print "test 15 failed (\\`)"
+
+ if ("a" ~ /a\'/) print "test 16 ok (\\')"
+ else print "test 16 failed (\\')"
+ if ("b" !~ /a\'/) print "test 17 ok (\\')"
+ else print "test 17 failed (\\')"
+}
diff --git a/contrib/awk/test/gnureops.ok b/contrib/awk/test/gnureops.ok
new file mode 100644
index 0000000..0fb5f50
--- /dev/null
+++ b/contrib/awk/test/gnureops.ok
@@ -0,0 +1,17 @@
+test 1 ok (\y)
+test 2 ok (\y)
+test 3 ok (\y)
+test 4 ok (\B)
+test 5 ok (\B)
+test 6 ok (\<)
+test 7 ok (\<)
+test 8 ok (\>)
+test 9 ok (\>)
+test 10 ok (\w)
+test 11 ok (\w)
+test 12 ok (\W)
+test 13 ok (\W)
+test 14 ok (\`)
+test 15 ok (\`)
+test 16 ok (\')
+test 17 ok (\')
diff --git a/contrib/awk/test/gsubasgn.awk b/contrib/awk/test/gsubasgn.awk
new file mode 100644
index 0000000..f0b7701
--- /dev/null
+++ b/contrib/awk/test/gsubasgn.awk
@@ -0,0 +1,13 @@
+# tests for assigning to a function within that function
+
+#1 - should be bad
+function test1 (r) { gsub(r, "x", test1) }
+BEGIN { test1("") }
+
+#2 - should be bad
+function test2 () { gsub(/a/, "x", test2) }
+BEGIN { test2() }
+
+#3 - should be ok
+function test3 (r) { gsub(/a/, "x", r) }
+BEGIN { test3("") }
diff --git a/contrib/awk/test/gsubasgn.ok b/contrib/awk/test/gsubasgn.ok
new file mode 100644
index 0000000..dfa6fbc
--- /dev/null
+++ b/contrib/awk/test/gsubasgn.ok
@@ -0,0 +1,4 @@
+gawk: gsubasgn.awk:4: function test1 (r) { gsub(r, "x", test1) }
+gawk: gsubasgn.awk:4: ^ gsub third parameter is not a changeable object
+gawk: gsubasgn.awk:8: function test2 () { gsub(/a/, "x", test2) }
+gawk: gsubasgn.awk:8: ^ gsub third parameter is not a changeable object
diff --git a/contrib/awk/test/gsubtest.awk b/contrib/awk/test/gsubtest.awk
new file mode 100755
index 0000000..3137479
--- /dev/null
+++ b/contrib/awk/test/gsubtest.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ str = "abc"; gsub("b+", "FOO", str); print str
+ str = "abc"; gsub("x*", "X", str); print str
+ str = "abc"; gsub("b*", "X", str); print str
+ str = "abc"; gsub("c", "X", str); print str
+ str = "abc"; gsub("c+", "X", str); print str
+ str = "abc"; gsub("x*$", "X", str); print str
+}
diff --git a/contrib/awk/test/gsubtest.ok b/contrib/awk/test/gsubtest.ok
new file mode 100644
index 0000000..191bebd
--- /dev/null
+++ b/contrib/awk/test/gsubtest.ok
@@ -0,0 +1,6 @@
+aFOOc
+XaXbXcX
+XaXcX
+abX
+abX
+abcX
diff --git a/contrib/awk/test/igncfs.awk b/contrib/awk/test/igncfs.awk
new file mode 100644
index 0000000..ebb58b2
--- /dev/null
+++ b/contrib/awk/test/igncfs.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ IGNORECASE=1
+ FS="[^a-z]+"
+}
+{
+ for (i=1; i<NF; i++) printf "%s, ", $i
+ printf "%s\n", $NF
+}
diff --git a/contrib/awk/test/igncfs.in b/contrib/awk/test/igncfs.in
new file mode 100644
index 0000000..5598017
--- /dev/null
+++ b/contrib/awk/test/igncfs.in
@@ -0,0 +1,2 @@
+this is handled ok
+This is Not hanDLed Well
diff --git a/contrib/awk/test/igncfs.ok b/contrib/awk/test/igncfs.ok
new file mode 100644
index 0000000..41df9a4
--- /dev/null
+++ b/contrib/awk/test/igncfs.ok
@@ -0,0 +1,2 @@
+this, is, handled, ok
+This, is, Not, hanDLed, Well
diff --git a/contrib/awk/test/ignrcase.ok b/contrib/awk/test/ignrcase.ok
new file mode 100644
index 0000000..d66e95c
--- /dev/null
+++ b/contrib/awk/test/ignrcase.ok
@@ -0,0 +1 @@
+xz
diff --git a/contrib/awk/test/inftest.awk b/contrib/awk/test/inftest.awk
new file mode 100644
index 0000000..ec0eda1
--- /dev/null
+++ b/contrib/awk/test/inftest.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ x = 100
+ do { y = x ; x *= 1000; print x,y } while ( y != x )
+ print "loop terminated"
+}
diff --git a/contrib/awk/test/inftest.ok b/contrib/awk/test/inftest.ok
new file mode 100644
index 0000000..83a93d0
--- /dev/null
+++ b/contrib/awk/test/inftest.ok
@@ -0,0 +1,105 @@
+100000 100
+100000000 100000
+1e+11 100000000
+1e+14 1e+11
+1e+17 1e+14
+1e+20 1e+17
+1e+23 1e+20
+1e+26 1e+23
+1e+29 1e+26
+1e+32 1e+29
+1e+35 1e+32
+1e+38 1e+35
+1e+41 1e+38
+1e+44 1e+41
+1e+47 1e+44
+1e+50 1e+47
+1e+53 1e+50
+1e+56 1e+53
+1e+59 1e+56
+1e+62 1e+59
+1e+65 1e+62
+1e+68 1e+65
+1e+71 1e+68
+1e+74 1e+71
+1e+77 1e+74
+1e+80 1e+77
+1e+83 1e+80
+1e+86 1e+83
+1e+89 1e+86
+1e+92 1e+89
+1e+95 1e+92
+1e+98 1e+95
+1e+101 1e+98
+1e+104 1e+101
+1e+107 1e+104
+1e+110 1e+107
+1e+113 1e+110
+1e+116 1e+113
+1e+119 1e+116
+1e+122 1e+119
+1e+125 1e+122
+1e+128 1e+125
+1e+131 1e+128
+1e+134 1e+131
+1e+137 1e+134
+1e+140 1e+137
+1e+143 1e+140
+1e+146 1e+143
+1e+149 1e+146
+1e+152 1e+149
+1e+155 1e+152
+1e+158 1e+155
+1e+161 1e+158
+1e+164 1e+161
+1e+167 1e+164
+1e+170 1e+167
+1e+173 1e+170
+1e+176 1e+173
+1e+179 1e+176
+1e+182 1e+179
+1e+185 1e+182
+1e+188 1e+185
+1e+191 1e+188
+1e+194 1e+191
+1e+197 1e+194
+1e+200 1e+197
+1e+203 1e+200
+1e+206 1e+203
+1e+209 1e+206
+1e+212 1e+209
+1e+215 1e+212
+1e+218 1e+215
+1e+221 1e+218
+1e+224 1e+221
+1e+227 1e+224
+1e+230 1e+227
+1e+233 1e+230
+1e+236 1e+233
+1e+239 1e+236
+1e+242 1e+239
+1e+245 1e+242
+1e+248 1e+245
+1e+251 1e+248
+1e+254 1e+251
+1e+257 1e+254
+1e+260 1e+257
+1e+263 1e+260
+1e+266 1e+263
+1e+269 1e+266
+1e+272 1e+269
+1e+275 1e+272
+1e+278 1e+275
+1e+281 1e+278
+1e+284 1e+281
+1e+287 1e+284
+1e+290 1e+287
+1e+293 1e+290
+1e+296 1e+293
+1e+299 1e+296
+1e+302 1e+299
+1e+305 1e+302
+1e+308 1e+305
+Inf 1e+308
+Inf Inf
+loop terminated
diff --git a/contrib/awk/test/intest.awk b/contrib/awk/test/intest.awk
new file mode 100644
index 0000000..f030d07
--- /dev/null
+++ b/contrib/awk/test/intest.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ bool = ((b = 1) in c);
+ print bool, b # gawk-3.0.1 prints "0 "; should print "0 1"
+}
diff --git a/contrib/awk/test/intest.ok b/contrib/awk/test/intest.ok
new file mode 100644
index 0000000..6e8183b
--- /dev/null
+++ b/contrib/awk/test/intest.ok
@@ -0,0 +1 @@
+0 1
diff --git a/contrib/awk/test/intprec.awk b/contrib/awk/test/intprec.awk
new file mode 100644
index 0000000..978e9ea
--- /dev/null
+++ b/contrib/awk/test/intprec.awk
@@ -0,0 +1 @@
+BEGIN { printf "%.10d:%.10x\n", 5, 14 }
diff --git a/contrib/awk/test/intprec.ok b/contrib/awk/test/intprec.ok
new file mode 100644
index 0000000..8783fac
--- /dev/null
+++ b/contrib/awk/test/intprec.ok
@@ -0,0 +1 @@
+0000000005:000000000e
diff --git a/contrib/awk/test/lib/awkpath.awk b/contrib/awk/test/lib/awkpath.awk
new file mode 100644
index 0000000..6663ca4
--- /dev/null
+++ b/contrib/awk/test/lib/awkpath.awk
@@ -0,0 +1 @@
+BEGIN { print "Found it." }
diff --git a/contrib/awk/test/litoct.awk b/contrib/awk/test/litoct.awk
new file mode 100644
index 0000000..5cfc128
--- /dev/null
+++ b/contrib/awk/test/litoct.awk
@@ -0,0 +1 @@
+{ if (/a\52b/) print "match" ; else print "no match" }
diff --git a/contrib/awk/test/litoct.ok b/contrib/awk/test/litoct.ok
new file mode 100644
index 0000000..4c0be97
--- /dev/null
+++ b/contrib/awk/test/litoct.ok
@@ -0,0 +1 @@
+no match
diff --git a/contrib/awk/test/longwrds.awk b/contrib/awk/test/longwrds.awk
new file mode 100644
index 0000000..f6a7816
--- /dev/null
+++ b/contrib/awk/test/longwrds.awk
@@ -0,0 +1,20 @@
+# From Gawk Manual modified by bug fix and removal of punctuation
+# Record every word which is used at least once
+{
+ for (i = 1; i <= NF; i++) {
+ tmp = tolower($i)
+ if (0 != (pos = match(tmp, /([a-z]|-)+/)))
+ used[substr(tmp, pos, RLENGTH)] = 1
+ }
+}
+
+#Find a number of distinct words longer than 10 characters
+END {
+ num_long_words = 0
+ for (x in used)
+ if (length(x) > 10) {
+ ++num_long_words
+ print x
+ }
+ print num_long_words, "long words"
+}
diff --git a/contrib/awk/test/longwrds.ok b/contrib/awk/test/longwrds.ok
new file mode 100644
index 0000000..01faa84
--- /dev/null
+++ b/contrib/awk/test/longwrds.ok
@@ -0,0 +1,21 @@
+20 long words
+compatibility
+concatenated
+consistency
+definitions
+description
+distributing
+fistatements
+gawk-options
+gnu-specific
+identically
+implementation
+implementations
+information
+non-portable
+pattern-action
+pre-defined
+program-file
+program-text
+programming
+restrictions
diff --git a/contrib/awk/test/manpage b/contrib/awk/test/manpage
new file mode 100644
index 0000000..09c3948
--- /dev/null
+++ b/contrib/awk/test/manpage
@@ -0,0 +1,200 @@
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "May 28 1991" "Free Software Foundation" "Utility Commands"
+.SH NAME
+gawk \- pattern scanning and processing language
+.SH SYNOPSIS
+.B gawk
+[
+.B \-W
+.I gawk-options
+] [
+.BI \-F\^ fs
+] [
+.B \-v
+.IR var = val
+]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B gawk
+[
+.B \-W
+.I gawk-options
+] [
+.BI \-F\^ fs
+] [
+.B \-v
+.IR var = val
+] [
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Gawk
+is the GNU Project's implementation of the AWK programming language.
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard
+(draft 11).
+This version in turn is based on the description in
+.IR "The AWK Programming Language" ,
+by Aho, Kernighan, and Weinberger,
+with the additional features defined in the System V Release 4 version
+of \*(UX
+.IR awk .
+.I Gawk
+also provides some GNU-specific extensions.
+.PP
+The command line consists of options to
+.I gawk
+itself, the AWK program text (if not supplied via the
+.B \-f
+option), and values to be made
+available in the
+.B ARGC
+and
+.B ARGV
+pre-defined AWK variables.
+.SH OPTIONS
+.PP
+.I Gawk
+accepts the following options, which should be available on any implementation
+of the AWK language.
+.TP
+.BI \-F fs
+Use
+.I fs
+for the input field separator (the value of the
+.B FS
+predefined
+variable).
+.TP
+\fB\-v\fI var\fR\^=\^\fIval\fR
+Assign the value
+.IR val ,
+to the variable
+.IR var ,
+before execution of the program begins.
+Such variable values are available to the
+.B BEGIN
+block of an AWK program.
+.TP
+.BI \-f " program-file"
+Read the AWK program source from the file
+.IR program-file ,
+instead of from the first command line argument.
+Multiple
+.B \-f
+options may be used.
+.TP
+.B \-\^\-
+Signal the end of options. This is useful to allow further arguments to the
+AWK program itself to start with a ``\-''.
+This is mainly for consistency with the argument parsing convention used
+by most other \*(PX programs.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option. Multiple
+.B \-W
+options may be supplied, or multiple arguments may be supplied together
+if they are separated by commas, or enclosed in quotes and separated
+by white space.
+Case is ignored in arguments to the
+.B \-W
+option.
+.PP
+The
+.B \-W
+option accepts the following arguments:
+.TP \w'\fBcopyright\fR'u+1n
+.B compat
+Run in
+.I compatibility
+mode. In compatibility mode,
+.I gawk
+behaves identically to \*(UX
+.IR awk ;
+none of the GNU-specific extensions are recognized.
+.TP
+.PD 0
+.B copyleft
+.TP
+.PD
+.B copyright
+Print the short version of the GNU copyright information message on
+the error output.
+.TP
+.B lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.TP
+.B posix
+This turns on
+.I compatibility
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.RE
+.TP
+.B version
+Print version information for this particular copy of
+.I gawk
+on the error output.
+This is useful mainly for knowing if the current copy of
+.I gawk
+on your system
+is up to date with respect to whatever the Free Software Foundation
+is distributing.
+.PP
+Any other options are flagged as illegal, but are otherwise ignored.
+.SH AWK PROGRAM EXECUTION
+.PP
+An AWK program consists of a sequence of pattern-action statements
+and optional function definitions.
+.RS
+.PP
+\fIpattern\fB { \fIaction statements\fB }\fR
+.br
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
+.RE
+.PP
+.I Gawk
+first reads the program source from the
+.IR program-file (s)
+if specified, or from the first non-option argument on the command line.
+The
+.B \-f
+option may be used multiple times on the command line.
+.I Gawk
+will read the program text as if all the
+.IR program-file s
+had been concatenated together. This is useful for building libraries
+of AWK functions, without having to include them in each new AWK
diff --git a/contrib/awk/test/manyfiles.awk b/contrib/awk/test/manyfiles.awk
new file mode 100644
index 0000000..8651a3a
--- /dev/null
+++ b/contrib/awk/test/manyfiles.awk
@@ -0,0 +1 @@
+{ print $2 > ("junk/" $1) }
diff --git a/contrib/awk/test/math.awk b/contrib/awk/test/math.awk
new file mode 100644
index 0000000..90a01dd
--- /dev/null
+++ b/contrib/awk/test/math.awk
@@ -0,0 +1,10 @@
+BEGIN {
+ pi = 3.1415927
+ printf "cos(%f) = %f\n", pi/4, cos(pi/4)
+ printf "sin(%f) = %f\n", pi/4, sin(pi/4)
+ e = exp(1)
+ printf "e = %f\n", e
+ printf "log(e) = %f\n", log(e)
+ printf "sqrt(pi ^ 2) = %f\n", sqrt(pi ^ 2)
+ printf "atan2(1, 1) = %f\n", atan2(1, 1)
+}
diff --git a/contrib/awk/test/math.ok b/contrib/awk/test/math.ok
new file mode 100644
index 0000000..a396a5b
--- /dev/null
+++ b/contrib/awk/test/math.ok
@@ -0,0 +1,6 @@
+cos(0.785398) = 0.707107
+sin(0.785398) = 0.707107
+e = 2.718282
+log(e) = 1.000000
+sqrt(pi ^ 2) = 3.141593
+atan2(1, 1) = 0.785398
diff --git a/contrib/awk/test/messages.awk b/contrib/awk/test/messages.awk
new file mode 100644
index 0000000..555f6e3
--- /dev/null
+++ b/contrib/awk/test/messages.awk
@@ -0,0 +1,9 @@
+# This is a demo of different ways of printing with gawk. Try it
+# with and without -c (compatibility) flag, redirecting output
+# from gawk to a file or not. Some results can be quite unexpected.
+BEGIN {
+ print "Goes to a file out1" > "out1"
+ print "Normal print statement"
+ print "This printed on stdout" > "/dev/stdout"
+ print "You blew it!" > "/dev/stderr"
+}
diff --git a/contrib/awk/test/mmap8k.in b/contrib/awk/test/mmap8k.in
new file mode 100644
index 0000000..0500ddf
--- /dev/null
+++ b/contrib/awk/test/mmap8k.in
@@ -0,0 +1,143 @@
+XXXXXXXX.com ALTERNET 9305 930528 1500.00 startup
+XXXXXXXX.com ALTERNET 9305 930624 94.38 Line-9305
+XXXXXXXX.com ALTERNET 9306 930624 104.49 Line-9306
+XXXXXXXX.com ALTERNET 9306 930624 649.16 Line-install
+XXXXXXXX.com ALTERNET 9306 930624 166.67 TCP-slip
+XXXXXXXX.com ALTERNET 9307 930624 104.49 Line-9307
+XXXXXXXX.com ALTERNET 9307 930624 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9308 930701 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9308 930701 104.49 line-9308
+XXXXXXXX.com PAYMENT 9307 930731 1500.00 1870
+XXXXXXXX.com ALTERNET 9309 930801 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9309 930801 104.49 line-9309
+XXXXXXXX.com INTEREST 9307 930801 22.50
+XXXXXXXX.com CREDADJ 9308 930805 22.50 waive interest
+XXXXXXXX.com PAYMENT 9308 930820 1723.68 1982
+XXXXXXXX.com ALTERNET 9310 930901 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9310 930901 104.49 line-9310
+XXXXXXXX.com PAYMENT 9310 931001 708.98 2313
+XXXXXXXX.com ALTERNET 9311 931001 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9311 931001 104.49 line-9311
+XXXXXXXX.com INTEREST 9309 931001 5.32
+XXXXXXXX.com CREDADJ 9310 931007 5.32 waive int-9309
+XXXXXXXX.com ALTERNET 9312 931101 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9312 931101 104.49 line-9312
+XXXXXXXX.com PAYMENT 9311 931120 354.49 002701
+XXXXXXXX.com ALTERNET 9401 931201 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9401 931201 104.49 line-9401
+XXXXXXXX.com PAYMENT 9312 931218 354.49 2884
+XXXXXXXX.com ALTERNET 9402 940101 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9402 940101 104.49 line-9402
+XXXXXXXX.com INTEREST 9312 940101 5.32
+XXXXXXXX.com PAYMENT 9401 940122 354.49 3084
+XXXXXXXX.com ALTERNET 9403 940201 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9403 940201 104.49 line-9403
+XXXXXXXX.com INTEREST 9401 940201 5.40
+XXXXXXXX.com PAYMENT 9402 940207 354.49 3140
+XXXXXXXX.com CREDADJ 9402 940211 5.32 interest-9402
+XXXXXXXX.com CREDADJ 9402 940211 5.40 interest-9403
+XXXXXXXX.com ALTERNET 9404 940301 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9404 940301 104.49 line-9404
+XXXXXXXX.com INTEREST 9402 940301 5.32
+XXXXXXXX.com PAYMENT 9403 940310 354.49 003307
+XXXXXXXX.com PAYMENT 9403 940324 354.49 3446
+XXXXXXXX.com ALTERNET 9405 940401 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9405 940401 104.49 line-9405
+XXXXXXXX.com ALTERNET 9406 940501 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9406 940501 104.49 line-9406
+XXXXXXXX.com INTEREST 9404 940501 5.40
+XXXXXXXX.com PAYMENT 9405 940509 359.81 003819
+XXXXXXXX.com ALTERNET 9407 940601 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9407 940601 104.49 line-9407
+XXXXXXXX.com INTEREST 9405 940601 5.40
+XXXXXXXX.com PAYMENT 9406 940603 354.49 004025
+XXXXXXXX.com ALTERNET 9408 940701 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9408 940701 104.49 line-9408
+XXXXXXXX.com INTEREST 9406 940701 5.48
+XXXXXXXX.com PAYMENT 9407 940725 354.49 004350
+XXXXXXXX.com ALTERNET 9409 940801 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9409 940801 104.49 line-9409
+XXXXXXXX.com INTEREST 9407 940801 5.56
+XXXXXXXX.com PAYMENT 9408 940808 354.49 004454
+XXXXXXXX.com ALTERNET 9409 940811 0.00 startup
+XXXXXXXX.com EQUIPMENT 9408 940831 399.00 ATL6402-1
+XXXXXXXX.com EQUIPMENT 9408 940831 2295.00 NBClassicPac-1
+XXXXXXXX.com EQUIPMENT 9408 940831 1060.00 Syn35-1+ship
+XXXXXXXX.com ALTERNET 9410 940901 250.00 TCP-slip
+XXXXXXXX.com ALTERNET 9410 940901 104.49 line-9410
+XXXXXXXX.com INTEREST 9408 940901 5.64
+XXXXXXXX.com PAYMENT 9409 940906 354.49 004677
+XXXXXXXX.com CREDADJ 9409 940921 124.95 TCP-slip-9409
+XXXXXXXX.com CREDADJ 9409 940921 52.20 line-9409
+XXXXXXXX.com CREDADJ 9410 940921 250.00 TCP-slip-9410
+XXXXXXXX.com CREDADJ 9410 940921 104.49 line-9410
+XXXXXXXX.com ALTERNET 9409 940921 397.50 TCP-56k-local recon
+XXXXXXXX.com ALTERNET 9409 940921 87.45 line-9409 recon
+XXXXXXXX.com ALTERNET 9410 940921 795.00 TCP-56k-local recon
+XXXXXXXX.com ALTERNET 9410 940921 174.90 line-9410 recon
+XXXXXXXX.com ALTERNET 9411 941001 795.00 TCP-56k-local
+XXXXXXXX.com ALTERNET 9411 941001 174.90 line-9411
+XXXXXXXX.com INTEREST 9409 941001 54.06
+XXXXXXXX.com PAYMENT 9410 941017 354.49 5026
+XXXXXXXX.com ALTERNET 9412 941101 795.00 TCP-56k-local
+XXXXXXXX.com ALTERNET 9412 941101 174.90 line-9412
+XXXXXXXX.com INTEREST 9410 941101 85.93
+XXXXXXXX.com PAYMENT 9411 941114 969.90 005274
+XXXXXXXX.com ALTERNET 9501 941201 795.00 TCP-56k-local
+XXXXXXXX.com ALTERNET 9501 941201 174.90 line-9501
+XXXXXXXX.com INTEREST 9411 941201 87.22
+XXXXXXXX.com PAYMENT 9412 941219 4723.90 5590
+XXXXXXXX.com ALTERNET 9502 950101 795.00 TCP-56k-local
+XXXXXXXX.com ALTERNET 9502 950101 174.90 line-9502
+XXXXXXXX.com INTEREST 9412 950101 32.22
+XXXXXXXX.com PAYMENT 9501 950103 1893.11 5766
+XXXXXXXX.com ALTERNET 9503 950201 795.00 TCP-56k-local-old
+XXXXXXXX.com ALTERNET 9503 950201 174.90 line-9503
+XXXXXXXX.com INTEREST 9501 950201 18.85
+XXXXXXXX.com PAYMENT 9502 950207 969.90 6044
+XXXXXXXX.com ALTERNET 9504 950301 795.00 TCP-56k-local-old
+XXXXXXXX.com ALTERNET 9504 950301 174.90 line-9504
+XXXXXXXX.com INTEREST 9502 950301 19.13
+XXXXXXXX.com PAYMENT 9503 950307 969.90 6408
+XXXXXXXX.com ALTERNET 9504 950316 3000.00 startup TCP-bt1-128k%5
+XXXXXXXX.com PAYMENT 9503 950327 969.90 6594
+XXXXXXXX.com ALTERNET 9505 950401 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9505 950401 556.60 line-9505
+XXXXXXXX.com EQUIPMENT 9504 950410 1595.00 cisco2501-1
+XXXXXXXX.com CREDADJ 9504 950417 503.50 TCP-56k-local
+XXXXXXXX.com CREDADJ 9504 950417 116.60 line-9504
+XXXXXXXX.com ALTERNET 9504 950417 448.80 line-install
+XXXXXXXX.com ALTERNET 9504 950417 752.02 TCP-bt1-128k%5 recon
+XXXXXXXX.com ALTERNET 9504 950417 371.00 line-9504 recon
+XXXXXXXX.com PAYMENT 9504 950424 3000.00 6841
+XXXXXXXX.com ALTERNET 9506 950501 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9506 950501 556.60 line-9506
+XXXXXXXX.com PAYMENT 9505 950505 2049.86 6985
+XXXXXXXX.com PAYMENT 9505 950531 3924.22 7179
+XXXXXXXX.com ALTERNET 9507 950601 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9507 950601 556.60 line-9507
+XXXXXXXX.com PAYMENT 9506 950607 1744.10 7232
+XXXXXXXX.com ALTERNET 9508 950701 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9508 950701 556.60 line-9508
+XXXXXXXX.com PAYMENT 9507 950705 1744.10 7641
+XXXXXXXX.com ALTERNET 9509 950801 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9509 950801 556.60 line-9509
+XXXXXXXX.com PAYMENT 9508 950803 1744.10 7914
+XXXXXXXX.com ALTERNET 9510 950901 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9510 950901 556.60 line-9510
+XXXXXXXX.com PAYMENT 9509 950905 1744.10 8203
+XXXXXXXX.com ALTERNET 9511 951001 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9511 951001 556.60 line-9511
+XXXXXXXX.com PAYMENT 9510 951003 1744.10 8508
+XXXXXXXX.com ALTERNET 9512 951101 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9512 951101 556.60 line-9512
+XXXXXXXX.com PAYMENT 9511 951103 2129.83 8837
+XXXXXXXX.com ALTERNET 9601 951201 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9601 951201 556.60 line-9601
+XXXXXXXX.com PAYMENT 9512 951204 2129.83 9131
+XXXXXXXX.com ALTERNET 9602 960101 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9602 960101 556.60 line-9602
+XXXXXXXX.com PAYMENT 9601 960103 1744.10 9456
+XXXXXXXX.com ALTERNET 9603 960201 1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com ALTERNET 9603 960201 556.60 line-9603
+XXXXXXXX.com PAYMENT 9602 960205 1358.37 9834
diff --git a/contrib/awk/test/negexp.ok b/contrib/awk/test/negexp.ok
new file mode 100644
index 0000000..6e6566c
--- /dev/null
+++ b/contrib/awk/test/negexp.ok
@@ -0,0 +1 @@
+0.01
diff --git a/contrib/awk/test/nfldstr.ok b/contrib/awk/test/nfldstr.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/nfldstr.ok
diff --git a/contrib/awk/test/nfset.awk b/contrib/awk/test/nfset.awk
new file mode 100644
index 0000000..09ebd08
--- /dev/null
+++ b/contrib/awk/test/nfset.awk
@@ -0,0 +1 @@
+{ NF = 5 ; print }
diff --git a/contrib/awk/test/nfset.in b/contrib/awk/test/nfset.in
new file mode 100644
index 0000000..43329b5
--- /dev/null
+++ b/contrib/awk/test/nfset.in
@@ -0,0 +1,5 @@
+1 2
+1 2 3 4
+1 2 3 4 5
+1 2 3 4 5 6 7 8
+1
diff --git a/contrib/awk/test/nfset.ok b/contrib/awk/test/nfset.ok
new file mode 100644
index 0000000..3ba48ae
--- /dev/null
+++ b/contrib/awk/test/nfset.ok
@@ -0,0 +1,5 @@
+1 2
+1 2 3 4
+1 2 3 4 5
+1 2 3 4 5
+1
diff --git a/contrib/awk/test/nlfldsep.awk b/contrib/awk/test/nlfldsep.awk
new file mode 100644
index 0000000..4fac81d
--- /dev/null
+++ b/contrib/awk/test/nlfldsep.awk
@@ -0,0 +1,2 @@
+BEGIN { RS = "A" }
+{print NF; for (i = 1; i <= NF; i++) print $i ; print ""}
diff --git a/contrib/awk/test/nlfldsep.in b/contrib/awk/test/nlfldsep.in
new file mode 100644
index 0000000..7b2317f
--- /dev/null
+++ b/contrib/awk/test/nlfldsep.in
@@ -0,0 +1,5 @@
+some stuff
+more stuffA
+junk
+stuffA
+final
diff --git a/contrib/awk/test/nlfldsep.ok b/contrib/awk/test/nlfldsep.ok
new file mode 100644
index 0000000..6684916
--- /dev/null
+++ b/contrib/awk/test/nlfldsep.ok
@@ -0,0 +1,13 @@
+4
+some
+stuff
+more
+stuff
+
+2
+junk
+stuff
+
+1
+final
+
diff --git a/contrib/awk/test/noeffect.awk b/contrib/awk/test/noeffect.awk
new file mode 100644
index 0000000..b375a4c
--- /dev/null
+++ b/contrib/awk/test/noeffect.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ s == "hello, world";
+ print s
+}
diff --git a/contrib/awk/test/noeffect.ok b/contrib/awk/test/noeffect.ok
new file mode 100644
index 0000000..b820ddf
--- /dev/null
+++ b/contrib/awk/test/noeffect.ok
@@ -0,0 +1,2 @@
+gawk: noeffect.awk:3: warning: statement may have no effect
+
diff --git a/contrib/awk/test/nofmtch.awk b/contrib/awk/test/nofmtch.awk
new file mode 100644
index 0000000..2ea2249
--- /dev/null
+++ b/contrib/awk/test/nofmtch.awk
@@ -0,0 +1 @@
+BEGIN { printf "%3\n" }
diff --git a/contrib/awk/test/nofmtch.ok b/contrib/awk/test/nofmtch.ok
new file mode 100644
index 0000000..e6f3846
--- /dev/null
+++ b/contrib/awk/test/nofmtch.ok
@@ -0,0 +1,2 @@
+gawk: nofmtch.awk:1: warning: printf format specifier does not have control letter
+%3
diff --git a/contrib/awk/test/nondec.awk b/contrib/awk/test/nondec.awk
new file mode 100644
index 0000000..a680110
--- /dev/null
+++ b/contrib/awk/test/nondec.awk
@@ -0,0 +1 @@
+BEGIN { print 0x81c3e8, 0x744018 }
diff --git a/contrib/awk/test/nondec.ok b/contrib/awk/test/nondec.ok
new file mode 100644
index 0000000..560f3d1
--- /dev/null
+++ b/contrib/awk/test/nondec.ok
@@ -0,0 +1 @@
+8504296 7618584
diff --git a/contrib/awk/test/nonl.awk b/contrib/awk/test/nonl.awk
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ b/contrib/awk/test/nonl.awk
@@ -0,0 +1 @@
+0 \ No newline at end of file
diff --git a/contrib/awk/test/nonl.ok b/contrib/awk/test/nonl.ok
new file mode 100644
index 0000000..24bd9b7
--- /dev/null
+++ b/contrib/awk/test/nonl.ok
@@ -0,0 +1 @@
+gawk: nonl.awk:1: warning: source file does not end in newline
diff --git a/contrib/awk/test/noparms.awk b/contrib/awk/test/noparms.awk
new file mode 100644
index 0000000..2c7ccc7
--- /dev/null
+++ b/contrib/awk/test/noparms.awk
@@ -0,0 +1 @@
+function x(a, b, c , ,) {}
diff --git a/contrib/awk/test/noparms.ok b/contrib/awk/test/noparms.ok
new file mode 100644
index 0000000..4c934c3
--- /dev/null
+++ b/contrib/awk/test/noparms.ok
@@ -0,0 +1,4 @@
+gawk: noparms.awk:1: function x(a, b, c , ,) {}
+gawk: noparms.awk:1: ^ parse error
+gawk: noparms.awk:1: function x(a, b, c , ,) {}
+gawk: noparms.awk:1: ^ parse error
diff --git a/contrib/awk/test/nors.in b/contrib/awk/test/nors.in
new file mode 100644
index 0000000..f90d9ec
--- /dev/null
+++ b/contrib/awk/test/nors.in
@@ -0,0 +1 @@
+A B C D E \ No newline at end of file
diff --git a/contrib/awk/test/nors.ok b/contrib/awk/test/nors.ok
new file mode 100644
index 0000000..54d5aab
--- /dev/null
+++ b/contrib/awk/test/nors.ok
@@ -0,0 +1,2 @@
+E
+E
diff --git a/contrib/awk/test/numsubstr.awk b/contrib/awk/test/numsubstr.awk
new file mode 100644
index 0000000..7a30993
--- /dev/null
+++ b/contrib/awk/test/numsubstr.awk
@@ -0,0 +1 @@
+{ print substr(1000+$1, 2) }
diff --git a/contrib/awk/test/numsubstr.in b/contrib/awk/test/numsubstr.in
new file mode 100644
index 0000000..ac65c36
--- /dev/null
+++ b/contrib/awk/test/numsubstr.in
@@ -0,0 +1,3 @@
+5000
+10000
+5000
diff --git a/contrib/awk/test/numsubstr.ok b/contrib/awk/test/numsubstr.ok
new file mode 100644
index 0000000..86ec13c
--- /dev/null
+++ b/contrib/awk/test/numsubstr.ok
@@ -0,0 +1,3 @@
+000
+1000
+000
diff --git a/contrib/awk/test/out1.ok b/contrib/awk/test/out1.ok
new file mode 100644
index 0000000..f54b2b4
--- /dev/null
+++ b/contrib/awk/test/out1.ok
@@ -0,0 +1 @@
+Goes to a file out1
diff --git a/contrib/awk/test/out2.ok b/contrib/awk/test/out2.ok
new file mode 100644
index 0000000..66b7d2f
--- /dev/null
+++ b/contrib/awk/test/out2.ok
@@ -0,0 +1,2 @@
+Normal print statement
+This printed on stdout
diff --git a/contrib/awk/test/out3.ok b/contrib/awk/test/out3.ok
new file mode 100644
index 0000000..7eb822f
--- /dev/null
+++ b/contrib/awk/test/out3.ok
@@ -0,0 +1 @@
+You blew it!
diff --git a/contrib/awk/test/paramdup.awk b/contrib/awk/test/paramdup.awk
new file mode 100644
index 0000000..1f1cc7a
--- /dev/null
+++ b/contrib/awk/test/paramdup.awk
@@ -0,0 +1,8 @@
+BEGIN { foo(0, 1, 2) }
+
+function foo(a, b, c, b, a)
+{
+ print "a =", a
+ print "b =", b
+ print "c =", c
+}
diff --git a/contrib/awk/test/paramdup.ok b/contrib/awk/test/paramdup.ok
new file mode 100644
index 0000000..0308cc8
--- /dev/null
+++ b/contrib/awk/test/paramdup.ok
@@ -0,0 +1,2 @@
+gawk: paramdup.awk:4: error: function `foo': parameter #4, `b', duplicates parameter #2
+gawk: paramdup.awk:4: error: function `foo': parameter #5, `a', duplicates parameter #1
diff --git a/contrib/awk/test/pcntplus.awk b/contrib/awk/test/pcntplus.awk
new file mode 100644
index 0000000..13999ac
--- /dev/null
+++ b/contrib/awk/test/pcntplus.awk
@@ -0,0 +1 @@
+BEGIN { printf "%+d %d\n", 3, 4 }
diff --git a/contrib/awk/test/pcntplus.ok b/contrib/awk/test/pcntplus.ok
new file mode 100644
index 0000000..b790269
--- /dev/null
+++ b/contrib/awk/test/pcntplus.ok
@@ -0,0 +1 @@
++3 4
diff --git a/contrib/awk/test/pid.awk b/contrib/awk/test/pid.awk
new file mode 100644
index 0000000..9b47d90
--- /dev/null
+++ b/contrib/awk/test/pid.awk
@@ -0,0 +1,44 @@
+# From: John C. Oppenheimer <jco@slinky.convex.com>
+# Subject: gawk-3.0.2 pid test
+# To: arnold@skeeve.atl.ga.us
+# Date: Mon, 10 Feb 1997 08:31:55 -0600 (CST)
+#
+# Thanks for the very quick reply.
+#
+# This all started when I was looking for how to do the equivalent of
+# "nextfile." I was after documentation and found our gawk down a few
+# revs.
+#
+# Looks like the nextfile functionality was added somewhere around
+# 2.15.5. There wasn't a way to do it, until now! Thanks for the
+# functionality!
+#
+# Saw the /dev/xxx capability and just tried it.
+#
+# Anyway, I wrote a pid test. I hope that it is portable. Wanted to
+# make a user test, but looks like id(1) is not very portable. But a
+# little test is better than none.
+#
+# John
+#
+# pid.ok is a zero length file
+#
+# ================== pid.awk ============
+BEGIN {
+ getline pid <"/dev/pid"
+ getline ppid <"/dev/ppid"
+}
+NR == 1 {
+ if (pid != $0) {
+ printf "Bad pid %d, wanted %d\n", $0, pid
+ }
+}
+NR == 2 {
+ if (ppid != $0) {
+ printf "Bad ppid %d, wanted %d\n", $0, ppid
+ }
+}
+END { # ADR --- added
+ close("/dev/pid")
+ close("/dev/ppid")
+}
diff --git a/contrib/awk/test/pid.ok b/contrib/awk/test/pid.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/pid.ok
diff --git a/contrib/awk/test/pid.sh b/contrib/awk/test/pid.sh
new file mode 100755
index 0000000..a19d72c
--- /dev/null
+++ b/contrib/awk/test/pid.sh
@@ -0,0 +1,5 @@
+#! /bin/sh
+AWK=${AWK-../gawk}
+echo $$ > _pid.in
+echo $1 >> _pid.in
+exec $AWK -f pid.awk _pid.in
diff --git a/contrib/awk/test/pipeio1.awk b/contrib/awk/test/pipeio1.awk
new file mode 100644
index 0000000..66f50ad
--- /dev/null
+++ b/contrib/awk/test/pipeio1.awk
@@ -0,0 +1,31 @@
+# From dragon!gamgee.acad.emich.edu!dhw Tue Mar 18 01:12:15 1997
+# Return-Path: <dragon!gamgee.acad.emich.edu!dhw>
+# Message-ID: <m0w6owW-000IDSC@gamgee.acad.emich.edu>
+# Date: Mon, 17 Mar 97 20:48 CST
+# From: dhw@gamgee.acad.emich.edu (David H. West)
+# To: arnold@gnu.ai.mit.edu
+# Subject: gawk 3.0.2 bug report (cc of msg to bug-gnu-utils)
+# Status: OR
+# Content-Length: 869
+# X-Lines: 20
+# X-Display-Position: 2
+#
+# Nature of bug: operation on a pipe side-effects a different pipe.
+# Observed-With: gawk 3.0.2, Linux kernel 2.0.28
+# Reproduce-By: running the following script, without and with the "close"
+# statement uncommented.
+# -----------------cut here--------------------------
+BEGIN {FILE1="test1"; FILE2="test2";
+ print "1\n" > FILE1; close(FILE1);
+ print "2\n" > FILE2; close(FILE2);
+ cmd1="cat " FILE1; cmd2="cat " FILE2;
+ #end of preparing commands which give easily-predictable output
+
+ while( (cmd1 | getline)==1) { #terminates as file has only 1 line
+ #and we never close cmd1
+ cmd2 | getline L;
+ #BUG: uncommenting the following line causes an infinite loop
+ close(cmd2);
+ print $0,L;
+ }
+ }
diff --git a/contrib/awk/test/pipeio1.ok b/contrib/awk/test/pipeio1.ok
new file mode 100644
index 0000000..706b09e
--- /dev/null
+++ b/contrib/awk/test/pipeio1.ok
@@ -0,0 +1,2 @@
+1 2
+ 2
diff --git a/contrib/awk/test/pipeio2.awk b/contrib/awk/test/pipeio2.awk
new file mode 100644
index 0000000..6f4f979
--- /dev/null
+++ b/contrib/awk/test/pipeio2.awk
@@ -0,0 +1,67 @@
+# From: megaadm@rina.quantum.de
+# Subject: Bug report - closing down pipes which read from shell com
+# To: bug-gnu-utils@prep.ai.mit.edu
+# Date: Thu, 27 Feb 1997 23:19:16 +0100 (CET)
+# CC: arnold@gnu.ai.mit.edu
+#
+# Hello people,
+#
+# i think i found a bug or something mysterious behaviour in
+# gawk Version 3.0 patchlevel 0.
+#
+# I am running on linux 2.0.25 under bash.
+#
+# Could you please have a look at the following awk program
+# an let me please know, if this is what i expect it to,
+# namely a bug.
+#
+# ----------- cut here --------------------------------------------
+BEGIN {
+ # OS is linux 2.0.25
+ # shell is bash
+ # Gnu Awk (gawk) 3.0, patchlevel 0
+ # The command i typed on the shell was "gawk -f <this_prog> -"
+
+ #com = "cal 01 1997"
+ com = ("cat " SRCDIR "/pipeio2.in")
+
+ while ((com | getline fnam) > 0) {
+
+ com_tr = "echo " fnam " | tr [0-9]. ..........."
+ print "\'" com_tr "\'"
+
+ com_tr | getline nam
+ print nam
+
+ # please run that program and take a look at the
+ # output. I think this is what was expected.
+
+ # Then comment in the following 4 lines and see
+ # what happens. I expect the first pipe "com | getline"
+ # not to be close, but i think this is exactly what happens
+ # So, is this ok ?
+
+ if (close(com_tr) < 0) {
+ print ERRNO
+ break
+ }
+ }
+
+ close(com)
+ }
+# ----------- cut here --------------------------------------------
+#
+# There is another thing i do not understand.
+# Why doesn't the awk - command "close" reports an
+# error, if i would say close("abc") which i had never
+# openend ?
+#
+# Regards,
+# Ulrich Gvbel
+# --
+# /********************************************************\
+# * Ulrich Gvbel, goebel@quantum.de *
+# * Quantum Gesellschaft f|r Software mbH, Dortmund *
+# * phone : +49-231-9749-201 fax: +49-231-9749-3 *
+# * private: +49-231-803994 fax: +49-231-803994 *
+# \********************************************************/
diff --git a/contrib/awk/test/pipeio2.in b/contrib/awk/test/pipeio2.in
new file mode 100644
index 0000000..2652b0e
--- /dev/null
+++ b/contrib/awk/test/pipeio2.in
@@ -0,0 +1,8 @@
+ January 1997
+ S M Tu W Th F S
+ 1 2 3 4
+ 5 6 7 8 9 10 11
+12 13 14 15 16 17 18
+19 20 21 22 23 24 25
+26 27 28 29 30 31
+
diff --git a/contrib/awk/test/pipeio2.ok b/contrib/awk/test/pipeio2.ok
new file mode 100644
index 0000000..3f55c05
--- /dev/null
+++ b/contrib/awk/test/pipeio2.ok
@@ -0,0 +1,16 @@
+'echo January 1997 | tr [0-9]. ...........'
+January ....
+'echo S M Tu W Th F S | tr [0-9]. ...........'
+S M Tu W Th F S
+'echo 1 2 3 4 | tr [0-9]. ...........'
+. . . .
+'echo 5 6 7 8 9 10 11 | tr [0-9]. ...........'
+. . . . . .. ..
+'echo 12 13 14 15 16 17 18 | tr [0-9]. ...........'
+.. .. .. .. .. .. ..
+'echo 19 20 21 22 23 24 25 | tr [0-9]. ...........'
+.. .. .. .. .. .. ..
+'echo 26 27 28 29 30 31 | tr [0-9]. ...........'
+.. .. .. .. .. ..
+'echo | tr [0-9]. ...........'
+
diff --git a/contrib/awk/test/posix.awk b/contrib/awk/test/posix.awk
new file mode 100644
index 0000000..79474f3
--- /dev/null
+++ b/contrib/awk/test/posix.awk
@@ -0,0 +1,69 @@
+BEGIN {
+ a = "+2"; b = 2; c = "+2a"; d = "+2 "; e = " 2"
+
+ printf "Test #1: "
+ if (b == a) print "\"" a "\"" " compares as a number"
+ else print "\"" a "\"" " compares as a string"
+
+ printf "Test #2: "
+ if (b == c) print "\"" c "\"" " compares as a number"
+ else print "\"" c "\"" " compares as a string"
+
+ printf "Test #3: "
+ if (b == d) print "\"" d "\"" " compares as a number"
+ else print "\"" d "\"" " compares as a string"
+
+ printf "Test #4: "
+ if (b == e) print "\"" e "\"" " compares as a number"
+ else print "\"" e "\"" " compares as a string"
+
+ f = a + b + c + d + e
+ print "after addition"
+
+ printf "Test #5: "
+ if (b == a) print "\"" a "\"" " compares as a number"
+ else print "\"" a "\"" " compares as a string"
+
+ printf "Test #6: "
+ if (b == c) print "\"" c "\"" " compares as a number"
+ else print "\"" c "\"" " compares as a string"
+
+ printf "Test #7: "
+ if (b == d) print "\"" d "\"" " compares as a number"
+ else print "\"" d "\"" " compares as a string"
+
+ printf "Test #8: "
+ if (b == e) print "\"" e "\"" " compares as a number"
+ else print "\"" e "\"" " compares as a string"
+
+ printf "Test #9: "
+ if ("3e5" > "5") print "\"3e5\" > \"5\""
+ else print "\"3e5\" <= \"5\""
+
+ printf "Test #10: "
+ x = 32.14
+ y[x] = "test"
+ OFMT = "%e"
+ print y[x]
+
+ printf "Test #11: "
+ x = x + 0
+ print y[x]
+
+ printf "Test #12: "
+ OFMT="%f"
+ CONVFMT="%e"
+ print 1.5, 1.5 ""
+
+ printf "Test #13: "
+ if ( 1000000 "" == 1000001 "") print "match"
+ else print "nomatch"
+}
+{
+ printf "Test #14: "
+ FS = ":"
+ print $1
+ FS = ","
+ printf "Test #15: "
+ print $2
+}
diff --git a/contrib/awk/test/posix.ok b/contrib/awk/test/posix.ok
new file mode 100644
index 0000000..100b150
--- /dev/null
+++ b/contrib/awk/test/posix.ok
@@ -0,0 +1,16 @@
+Test #1: "+2" compares as a string
+Test #2: "+2a" compares as a string
+Test #3: "+2 " compares as a string
+Test #4: " 2" compares as a string
+after addition
+Test #5: "+2" compares as a string
+Test #6: "+2a" compares as a string
+Test #7: "+2 " compares as a string
+Test #8: " 2" compares as a string
+Test #9: "3e5" <= "5"
+Test #10: test
+Test #11: test
+Test #12: 1.500000 1.500000e+00
+Test #13: nomatch
+Test #14: 1:2,3
+Test #15: 4
diff --git a/contrib/awk/test/poundbang b/contrib/awk/test/poundbang
new file mode 100755
index 0000000..d60652e
--- /dev/null
+++ b/contrib/awk/test/poundbang
@@ -0,0 +1,3 @@
+#! /tmp/gawk -f
+ { ccount += length($0) }
+END { printf "average line length is %2.4f\n", ccount/NR}
diff --git a/contrib/awk/test/poundbang.ok b/contrib/awk/test/poundbang.ok
new file mode 100644
index 0000000..143e28d
--- /dev/null
+++ b/contrib/awk/test/poundbang.ok
@@ -0,0 +1 @@
+average line length is 32.6667
diff --git a/contrib/awk/test/prdupval.awk b/contrib/awk/test/prdupval.awk
new file mode 100644
index 0000000..32c67dc
--- /dev/null
+++ b/contrib/awk/test/prdupval.awk
@@ -0,0 +1 @@
+{ print NF, $NF, "abc" $NF }
diff --git a/contrib/awk/test/prdupval.in b/contrib/awk/test/prdupval.in
new file mode 100644
index 0000000..5626abf
--- /dev/null
+++ b/contrib/awk/test/prdupval.in
@@ -0,0 +1 @@
+one
diff --git a/contrib/awk/test/prdupval.ok b/contrib/awk/test/prdupval.ok
new file mode 100644
index 0000000..6253616
--- /dev/null
+++ b/contrib/awk/test/prdupval.ok
@@ -0,0 +1 @@
+1 one abcone
diff --git a/contrib/awk/test/prmarscl.awk b/contrib/awk/test/prmarscl.awk
new file mode 100644
index 0000000..3caf3d9
--- /dev/null
+++ b/contrib/awk/test/prmarscl.awk
@@ -0,0 +1,6 @@
+function test(a)
+{
+ print a[1]
+}
+
+BEGIN { j = 4; test(j) }
diff --git a/contrib/awk/test/prmarscl.ok b/contrib/awk/test/prmarscl.ok
new file mode 100644
index 0000000..b42cee6
--- /dev/null
+++ b/contrib/awk/test/prmarscl.ok
@@ -0,0 +1 @@
+gawk: prmarscl.awk:4: fatal: attempt to use scalar parameter 1 as an array
diff --git a/contrib/awk/test/prmreuse.awk b/contrib/awk/test/prmreuse.awk
new file mode 100644
index 0000000..37e06f5
--- /dev/null
+++ b/contrib/awk/test/prmreuse.awk
@@ -0,0 +1,14 @@
+# from Pat Rankin, rankin@eql.caltech.edu
+
+BEGIN { dummy(1); legit(); exit }
+
+function dummy(arg)
+{
+ return arg
+}
+
+function legit( scratch)
+{
+ split("1 2 3", scratch)
+ return ""
+}
diff --git a/contrib/awk/test/prmreuse.ok b/contrib/awk/test/prmreuse.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/prmreuse.ok
diff --git a/contrib/awk/test/prt1eval.awk b/contrib/awk/test/prt1eval.awk
new file mode 100644
index 0000000..4ecd368
--- /dev/null
+++ b/contrib/awk/test/prt1eval.awk
@@ -0,0 +1,6 @@
+function tst () {
+ sum += 1
+ return sum
+}
+
+BEGIN { OFMT = "%.0f" ; print tst() }
diff --git a/contrib/awk/test/prt1eval.ok b/contrib/awk/test/prt1eval.ok
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/contrib/awk/test/prt1eval.ok
@@ -0,0 +1 @@
+1
diff --git a/contrib/awk/test/prtoeval.awk b/contrib/awk/test/prtoeval.awk
new file mode 100644
index 0000000..77880d8
--- /dev/null
+++ b/contrib/awk/test/prtoeval.awk
@@ -0,0 +1,4 @@
+function returns_a_str() { print "<in function>" ; return "'A STRING'" }
+BEGIN {
+ print "partial line:", returns_a_str()
+}
diff --git a/contrib/awk/test/prtoeval.ok b/contrib/awk/test/prtoeval.ok
new file mode 100644
index 0000000..13e122b
--- /dev/null
+++ b/contrib/awk/test/prtoeval.ok
@@ -0,0 +1,2 @@
+<in function>
+partial line: 'A STRING'
diff --git a/contrib/awk/test/rand.awk b/contrib/awk/test/rand.awk
new file mode 100644
index 0000000..6378f3d
--- /dev/null
+++ b/contrib/awk/test/rand.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ srand(2)
+ for (i = 0; i < 19; i++)
+ printf "%3d ", (1 + int(100 * rand()))
+ print ""
+}
diff --git a/contrib/awk/test/rand.ok b/contrib/awk/test/rand.ok
new file mode 100644
index 0000000..b6d7554
--- /dev/null
+++ b/contrib/awk/test/rand.ok
@@ -0,0 +1 @@
+ 27 17 86 27 22 53 61 11 33 48 51 97 99 35 20 27 62 100 32
diff --git a/contrib/awk/test/reg/exp-eq.awk b/contrib/awk/test/reg/exp-eq.awk
new file mode 100644
index 0000000..fed6a69
--- /dev/null
+++ b/contrib/awk/test/reg/exp-eq.awk
@@ -0,0 +1 @@
+{ $0 ^= 3 ; print $1}
diff --git a/contrib/awk/test/reg/exp-eq.good b/contrib/awk/test/reg/exp-eq.good
new file mode 100644
index 0000000..d8d59aa
--- /dev/null
+++ b/contrib/awk/test/reg/exp-eq.good
@@ -0,0 +1,3 @@
+1
+8
+27
diff --git a/contrib/awk/test/reg/exp-eq.in b/contrib/awk/test/reg/exp-eq.in
new file mode 100644
index 0000000..01e79c3
--- /dev/null
+++ b/contrib/awk/test/reg/exp-eq.in
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/contrib/awk/test/reg/exp.awk b/contrib/awk/test/reg/exp.awk
new file mode 100644
index 0000000..4e707f8
--- /dev/null
+++ b/contrib/awk/test/reg/exp.awk
@@ -0,0 +1 @@
+BEGIN { print exp(0), exp(1000000), exp(0.5) }
diff --git a/contrib/awk/test/reg/exp.good b/contrib/awk/test/reg/exp.good
new file mode 100644
index 0000000..07b8853
--- /dev/null
+++ b/contrib/awk/test/reg/exp.good
@@ -0,0 +1,2 @@
+1 gawk: reg/exp.awk:1: warning: exp argument 1e+06 is out of range
+Inf 1.64872
diff --git a/contrib/awk/test/reg/exp.in b/contrib/awk/test/reg/exp.in
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/reg/exp.in
diff --git a/contrib/awk/test/reg/func.awk b/contrib/awk/test/reg/func.awk
new file mode 100644
index 0000000..e32cd4e
--- /dev/null
+++ b/contrib/awk/test/reg/func.awk
@@ -0,0 +1 @@
+BEGIN { print dummy(1) }
diff --git a/contrib/awk/test/reg/func.good b/contrib/awk/test/reg/func.good
new file mode 100644
index 0000000..d3c7c71
--- /dev/null
+++ b/contrib/awk/test/reg/func.good
@@ -0,0 +1 @@
+gawk: reg/func.awk:1: fatal: function `dummy' not defined
diff --git a/contrib/awk/test/reg/func.in b/contrib/awk/test/reg/func.in
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/reg/func.in
diff --git a/contrib/awk/test/reg/func2.awk b/contrib/awk/test/reg/func2.awk
new file mode 100644
index 0000000..2abf2c1
--- /dev/null
+++ b/contrib/awk/test/reg/func2.awk
@@ -0,0 +1,2 @@
+function dummy() { ; }
+BEGIN { print dummy (1) }
diff --git a/contrib/awk/test/reg/func2.good b/contrib/awk/test/reg/func2.good
new file mode 100644
index 0000000..ae87bc3
--- /dev/null
+++ b/contrib/awk/test/reg/func2.good
@@ -0,0 +1,2 @@
+gawk: reg/func2.awk:2: fatal: function `dummy' called with space between name and (,
+or used in other expression context
diff --git a/contrib/awk/test/reg/func2.in b/contrib/awk/test/reg/func2.in
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/reg/func2.in
diff --git a/contrib/awk/test/reg/log.awk b/contrib/awk/test/reg/log.awk
new file mode 100644
index 0000000..bcae90b
--- /dev/null
+++ b/contrib/awk/test/reg/log.awk
@@ -0,0 +1 @@
+BEGIN { print log(0), log(-1), log(100) }
diff --git a/contrib/awk/test/reg/log.good b/contrib/awk/test/reg/log.good
new file mode 100644
index 0000000..857ab77
--- /dev/null
+++ b/contrib/awk/test/reg/log.good
@@ -0,0 +1,4 @@
+log: SING error
+-Inf gawk: reg/log.awk:1: warning: log called with negative argument -1
+log: DOMAIN error
+NaN 4.60517
diff --git a/contrib/awk/test/reg/log.in b/contrib/awk/test/reg/log.in
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/reg/log.in
diff --git a/contrib/awk/test/regtest b/contrib/awk/test/regtest
new file mode 100755
index 0000000..72b0dbf
--- /dev/null
+++ b/contrib/awk/test/regtest
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+case "$AWK" in
+"") AWK=../gawk ;;
+esac
+#AWK=${AWK:-../gawk}
+
+for i in reg/*.awk
+do
+ it=`basename $i .awk`
+ $AWK -f $i <reg/$it.in >reg/$it.out 2>&1
+ if cmp -s reg/$it.out reg/$it.good
+ then
+ rm -f reg/$it.out
+ else
+ echo "regtest: $it fails"
+ fi
+done
diff --git a/contrib/awk/test/reindops.awk b/contrib/awk/test/reindops.awk
new file mode 100644
index 0000000..13ae657
--- /dev/null
+++ b/contrib/awk/test/reindops.awk
@@ -0,0 +1,6 @@
+{
+ if ($1 !~ /^+[2-9]/)
+ print "gawk is broken"
+ else
+ print "gawk is ok"
+}
diff --git a/contrib/awk/test/reindops.in b/contrib/awk/test/reindops.in
new file mode 100644
index 0000000..b1e5435
--- /dev/null
+++ b/contrib/awk/test/reindops.in
@@ -0,0 +1 @@
++44 123 456
diff --git a/contrib/awk/test/reindops.ok b/contrib/awk/test/reindops.ok
new file mode 100644
index 0000000..f9605fd
--- /dev/null
+++ b/contrib/awk/test/reindops.ok
@@ -0,0 +1 @@
+gawk is ok
diff --git a/contrib/awk/test/reint.awk b/contrib/awk/test/reint.awk
new file mode 100644
index 0000000..add0f2a
--- /dev/null
+++ b/contrib/awk/test/reint.awk
@@ -0,0 +1 @@
+{ print match($0, /a{3}/) }
diff --git a/contrib/awk/test/reint.in b/contrib/awk/test/reint.in
new file mode 100644
index 0000000..43caa2a
--- /dev/null
+++ b/contrib/awk/test/reint.in
@@ -0,0 +1 @@
+match this: aaa
diff --git a/contrib/awk/test/reint.ok b/contrib/awk/test/reint.ok
new file mode 100644
index 0000000..b1bd38b
--- /dev/null
+++ b/contrib/awk/test/reint.ok
@@ -0,0 +1 @@
+13
diff --git a/contrib/awk/test/reparse.awk b/contrib/awk/test/reparse.awk
new file mode 100644
index 0000000..433ecbb
--- /dev/null
+++ b/contrib/awk/test/reparse.awk
@@ -0,0 +1,7 @@
+{
+ gsub(/x/, " ")
+ $0 = $0
+ print $1
+ print $0
+ print $1, $2, $3
+}
diff --git a/contrib/awk/test/reparse.in b/contrib/awk/test/reparse.in
new file mode 100644
index 0000000..6f31cde
--- /dev/null
+++ b/contrib/awk/test/reparse.in
@@ -0,0 +1 @@
+1 axbxc 2
diff --git a/contrib/awk/test/reparse.ok b/contrib/awk/test/reparse.ok
new file mode 100644
index 0000000..6bdfacf
--- /dev/null
+++ b/contrib/awk/test/reparse.ok
@@ -0,0 +1,3 @@
+1
+1 a b c 2
+1 a b
diff --git a/contrib/awk/test/resplit.ok b/contrib/awk/test/resplit.ok
new file mode 100644
index 0000000..6178079
--- /dev/null
+++ b/contrib/awk/test/resplit.ok
@@ -0,0 +1 @@
+b
diff --git a/contrib/awk/test/rs.in b/contrib/awk/test/rs.in
new file mode 100644
index 0000000..edef835
--- /dev/null
+++ b/contrib/awk/test/rs.in
@@ -0,0 +1,15 @@
+
+
+a
+b
+
+
+c d
+
+
+
+e
+
+
+
+
diff --git a/contrib/awk/test/rs.ok b/contrib/awk/test/rs.ok
new file mode 100644
index 0000000..9dd6bd3
--- /dev/null
+++ b/contrib/awk/test/rs.ok
@@ -0,0 +1,3 @@
+a b
+c d
+e
diff --git a/contrib/awk/test/rswhite.awk b/contrib/awk/test/rswhite.awk
new file mode 100644
index 0000000..0048765
--- /dev/null
+++ b/contrib/awk/test/rswhite.awk
@@ -0,0 +1,2 @@
+BEGIN { RS = "" }
+{ printf("<%s>\n", $0) }
diff --git a/contrib/awk/test/rswhite.in b/contrib/awk/test/rswhite.in
new file mode 100644
index 0000000..39f7756
--- /dev/null
+++ b/contrib/awk/test/rswhite.in
@@ -0,0 +1,2 @@
+ a b
+c d
diff --git a/contrib/awk/test/rswhite.ok b/contrib/awk/test/rswhite.ok
new file mode 100644
index 0000000..a029e47
--- /dev/null
+++ b/contrib/awk/test/rswhite.ok
@@ -0,0 +1,2 @@
+< a b
+c d>
diff --git a/contrib/awk/test/sclforin.awk b/contrib/awk/test/sclforin.awk
new file mode 100644
index 0000000..335e854
--- /dev/null
+++ b/contrib/awk/test/sclforin.awk
@@ -0,0 +1 @@
+BEGIN { j = 4; for (i in j) print j[i] }
diff --git a/contrib/awk/test/sclforin.ok b/contrib/awk/test/sclforin.ok
new file mode 100644
index 0000000..d87fa61
--- /dev/null
+++ b/contrib/awk/test/sclforin.ok
@@ -0,0 +1 @@
+gawk: sclforin.awk:1: fatal: attempt to use scalar as array
diff --git a/contrib/awk/test/sclifin.awk b/contrib/awk/test/sclifin.awk
new file mode 100644
index 0000000..64f5d0d
--- /dev/null
+++ b/contrib/awk/test/sclifin.awk
@@ -0,0 +1,7 @@
+BEGIN {
+ j = 4
+ if ("foo" in j)
+ print "ouch"
+ else
+ print "ok"
+}
diff --git a/contrib/awk/test/sclifin.ok b/contrib/awk/test/sclifin.ok
new file mode 100644
index 0000000..717f836
--- /dev/null
+++ b/contrib/awk/test/sclifin.ok
@@ -0,0 +1 @@
+gawk: sclifin.awk:7: fatal: attempt to use scalar as array
diff --git a/contrib/awk/test/splitargv.awk b/contrib/awk/test/splitargv.awk
new file mode 100644
index 0000000..10886ef
--- /dev/null
+++ b/contrib/awk/test/splitargv.awk
@@ -0,0 +1,7 @@
+BEGIN {
+ for (idx = 1; idx < ARGC; idx++)
+ split(ARGV[idx], temp, ".");
+ }
+ {
+ print $0;
+ }
diff --git a/contrib/awk/test/splitargv.in b/contrib/awk/test/splitargv.in
new file mode 100644
index 0000000..10886ef
--- /dev/null
+++ b/contrib/awk/test/splitargv.in
@@ -0,0 +1,7 @@
+BEGIN {
+ for (idx = 1; idx < ARGC; idx++)
+ split(ARGV[idx], temp, ".");
+ }
+ {
+ print $0;
+ }
diff --git a/contrib/awk/test/splitargv.ok b/contrib/awk/test/splitargv.ok
new file mode 100644
index 0000000..10886ef
--- /dev/null
+++ b/contrib/awk/test/splitargv.ok
@@ -0,0 +1,7 @@
+BEGIN {
+ for (idx = 1; idx < ARGC; idx++)
+ split(ARGV[idx], temp, ".");
+ }
+ {
+ print $0;
+ }
diff --git a/contrib/awk/test/splitvar.awk b/contrib/awk/test/splitvar.awk
new file mode 100644
index 0000000..9e1ac79
--- /dev/null
+++ b/contrib/awk/test/splitvar.awk
@@ -0,0 +1,5 @@
+{
+ sep = "=+"
+ n = split($0, a, sep)
+ print n
+}
diff --git a/contrib/awk/test/splitvar.in b/contrib/awk/test/splitvar.in
new file mode 100644
index 0000000..85be8ee
--- /dev/null
+++ b/contrib/awk/test/splitvar.in
@@ -0,0 +1 @@
+Here===Is=Some=====Data
diff --git a/contrib/awk/test/splitvar.ok b/contrib/awk/test/splitvar.ok
new file mode 100644
index 0000000..b8626c4
--- /dev/null
+++ b/contrib/awk/test/splitvar.ok
@@ -0,0 +1 @@
+4
diff --git a/contrib/awk/test/splitwht.awk b/contrib/awk/test/splitwht.awk
new file mode 100644
index 0000000..6163d72
--- /dev/null
+++ b/contrib/awk/test/splitwht.awk
@@ -0,0 +1,7 @@
+BEGIN {
+ str = "a b\t\tc d"
+ n = split(str, a, " ")
+ print n
+ m = split(str, b, / /)
+ print m
+}
diff --git a/contrib/awk/test/splitwht.ok b/contrib/awk/test/splitwht.ok
new file mode 100644
index 0000000..61c83cb
--- /dev/null
+++ b/contrib/awk/test/splitwht.ok
@@ -0,0 +1,2 @@
+4
+5
diff --git a/contrib/awk/test/sprintfc.awk b/contrib/awk/test/sprintfc.awk
new file mode 100644
index 0000000..ee1e5a7
--- /dev/null
+++ b/contrib/awk/test/sprintfc.awk
@@ -0,0 +1 @@
+{ print sprintf("%c", $1), $1 }
diff --git a/contrib/awk/test/sprintfc.in b/contrib/awk/test/sprintfc.in
new file mode 100644
index 0000000..4602d28
--- /dev/null
+++ b/contrib/awk/test/sprintfc.in
@@ -0,0 +1,3 @@
+65
+66
+foo
diff --git a/contrib/awk/test/sprintfc.ok b/contrib/awk/test/sprintfc.ok
new file mode 100644
index 0000000..33769a8
--- /dev/null
+++ b/contrib/awk/test/sprintfc.ok
@@ -0,0 +1,3 @@
+A 65
+B 66
+f foo
diff --git a/contrib/awk/test/strftlng.awk b/contrib/awk/test/strftlng.awk
new file mode 100644
index 0000000..0ef8195
--- /dev/null
+++ b/contrib/awk/test/strftlng.awk
@@ -0,0 +1,11 @@
+# test file from Paul Eggert, eggert@twinsun.com
+# modified for portability (%c doesn't cut it)
+
+BEGIN {
+ BUFSIZ = 1024
+ simpleformat = format = "%m/%d/%y %H:%M:%S\n"
+ clen = length(strftime(format, 0))
+ for (i = 1; i < BUFSIZ / clen + 1; i++)
+ format = format simpleformat
+ printf "%s", strftime(format, 0)
+}
diff --git a/contrib/awk/test/strftlng.ok b/contrib/awk/test/strftlng.ok
new file mode 100644
index 0000000..3008aa2
--- /dev/null
+++ b/contrib/awk/test/strftlng.ok
@@ -0,0 +1,58 @@
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
diff --git a/contrib/awk/test/substr.awk b/contrib/awk/test/substr.awk
new file mode 100644
index 0000000..6016369
--- /dev/null
+++ b/contrib/awk/test/substr.awk
@@ -0,0 +1,14 @@
+BEGIN {
+ x = "A"
+ printf("%-39s\n", substr(x,1,39))
+ print substr("abcdef", 0, 2)
+ print substr("abcdef", 2.3, 2)
+ print substr("abcdef", -1, 2)
+ print substr("abcdef", 1, 0)
+ print substr("abcdef", 1, -3)
+ print substr("abcdef", 1, 2.3)
+ print substr("", 1, 2)
+ print substr("abcdef", 5, 5)
+ print substr("abcdef", 7, 2)
+ exit (0)
+}
diff --git a/contrib/awk/test/substr.ok b/contrib/awk/test/substr.ok
new file mode 100644
index 0000000..be6889d
--- /dev/null
+++ b/contrib/awk/test/substr.ok
@@ -0,0 +1,10 @@
+A
+ab
+bc
+ab
+
+
+ab
+
+ef
+
diff --git a/contrib/awk/test/swaplns.awk b/contrib/awk/test/swaplns.awk
new file mode 100644
index 0000000..6bf2240
--- /dev/null
+++ b/contrib/awk/test/swaplns.awk
@@ -0,0 +1,7 @@
+{
+ if ((getline tmp) > 0) {
+ print tmp
+ print
+ } else
+ print
+}
diff --git a/contrib/awk/test/swaplns.in b/contrib/awk/test/swaplns.in
new file mode 100644
index 0000000..71fb162
--- /dev/null
+++ b/contrib/awk/test/swaplns.in
@@ -0,0 +1,9 @@
+This directory contains some examples/test-cases for different
+features of gawk - mostly not present in an old awk. Some are from
+"The GAWK Manual", some are original, and some are mixture of the two.
+Read header comments before attempting to use. Have fun and remember
+that program which consists only of BEGIN block does not need an input
+file.
+
+ --mj
+
diff --git a/contrib/awk/test/swaplns.ok b/contrib/awk/test/swaplns.ok
new file mode 100644
index 0000000..d38b7ca
--- /dev/null
+++ b/contrib/awk/test/swaplns.ok
@@ -0,0 +1,9 @@
+features of gawk - mostly not present in an old awk. Some are from
+This directory contains some examples/test-cases for different
+Read header comments before attempting to use. Have fun and remember
+"The GAWK Manual", some are original, and some are mixture of the two.
+file.
+that program which consists only of BEGIN block does not need an input
+ --mj
+
+
diff --git a/contrib/awk/test/tradanch.awk b/contrib/awk/test/tradanch.awk
new file mode 100644
index 0000000..0cd45d1
--- /dev/null
+++ b/contrib/awk/test/tradanch.awk
@@ -0,0 +1,2 @@
+/foo^bar/
+/foo$bar/
diff --git a/contrib/awk/test/tradanch.in b/contrib/awk/test/tradanch.in
new file mode 100644
index 0000000..e5c8a09
--- /dev/null
+++ b/contrib/awk/test/tradanch.in
@@ -0,0 +1,2 @@
+foo^bar
+foo$bar
diff --git a/contrib/awk/test/tradanch.ok b/contrib/awk/test/tradanch.ok
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/contrib/awk/test/tradanch.ok
diff --git a/contrib/awk/test/tweakfld.awk b/contrib/awk/test/tweakfld.awk
new file mode 100644
index 0000000..e7b538f
--- /dev/null
+++ b/contrib/awk/test/tweakfld.awk
@@ -0,0 +1,296 @@
+# To: bug-gnu-utils@prep.ai.mit.edu
+# Cc: arnold@gnu.ai.mit.edu
+# Date: Mon, 20 Nov 1995 11:39:29 -0500
+# From: "R. Hank Donnelly" <emory!head-cfa.harvard.edu!donnelly>
+#
+# Operating system: Linux1.2.13 (Slackware distrib)
+# GAWK version: 2.15 (?)
+# compiler: GCC (?)
+#
+# The following enclosed script does not want to fully process the input data
+# file. It correctly executes the operations on the first record, and then dies
+# on the second one. My true data file is much longer but this is
+# representative and it does fail on a file even as short as this one.
+# The failure appears to occur in the declared function add2output. Between the
+# steps of incrementing NF by one and setting $NF to the passed variable
+# the passed variable appears to vanish (i.e. NF does go from 68 to 69
+# and before incrementing it "variable" equals what it should but after
+# "variable" has no value at all.)
+#
+# The scripts have been developed using nawk on a Sun (where they run fine)
+# I have tried gawk there but get a different crash which I have not yet traced
+# down. Ideally I would like to keep the script the same so that it would run
+# on either gawk or nawk (that way I can step back and forth between laptop and
+# workstation.
+#
+# Any ideas why the laptop installation is having problems?
+# Hank
+#
+#
+# #!/usr/bin/gawk -f
+
+BEGIN {
+ # set a few values
+ FS = "\t"
+ OFS = "\t"
+ pi = atan2(0, -1)
+# distance from HRMA to focal plane in mm
+ fullradius = 10260.54
+
+ # set locations of parameters on input line
+ nf_nrg = 1
+ nf_order = 3
+ nf_item = 4
+ nf_suite = 5
+ nf_grating = 8
+ nf_shutter = 9
+ nf_type = 13
+ nf_src = 14
+ nf_target = 15
+ nf_voltage = 16
+ nf_flux = 17
+ nf_filt1 = 20
+ nf_filt1_th = 21
+ nf_filt2 = 22
+ nf_filt2_th = 23
+ nf_bnd = 24
+ nf_hrma_polar = 27
+ nf_hrma_az = 28
+ nf_detector = 30
+ nf_acis_read = 32
+ nf_acis_proc = 33
+ nf_acis_frame = 34
+ nf_hxda_aplist = 36
+ nf_hxda_y_range = 37
+ nf_hxda_z_range = 38
+ nf_hxda_y_step = 39
+ nf_hxda_z_step = 40
+ nf_sim_z = 41
+ nf_fam_polar = 43
+ nf_fam_az = 44
+ nf_fam_dither_type = 45
+ nf_mono_init = 51
+ nf_mono_range = 52
+ nf_mono_step = 53
+ nf_defocus = 54
+ nf_acis_temp = 55
+ nf_tight = 59
+ nf_offset_y = 64
+ nf_offset_z = 65
+
+ while( getline < "xrcf_mnemonics.dat" > 0 ) {
+ mnemonic[$1] = $2
+ }
+
+# "date" | getline date_line
+# ADR: use a fixed date so that testing will work
+ date_line = "Sun Mar 10 23:00:27 EST 1996"
+ split(date_line, in_date, " ")
+ out_date = in_date[2] " " in_date[3] ", " in_date[6]
+}
+
+function add2output( variable ) {
+#print("hi1") >> "debug"
+ NF++
+#print("hi2") >> "debug"
+ $NF = variable
+#print("hi3") >> "debug"
+}
+
+function error( ekey, message ) {
+ print "Error at input line " NR ", anode " ekey >> "errors.cleanup"
+ print " " message "." >> "errors.cleanup"
+}
+
+function hxda_na() {
+ $nf_hxda_aplist = $nf_hxda_y_range = $nf_hxda_z_range = "N/A"
+ $nf_hxda_y_step = $nf_hxda_z_step = "N/A"
+}
+
+function acis_na() {
+ $nf_acis_read = $nf_acis_proc = $nf_acis_frame = $nf_acis_temp = "N/A"
+}
+
+function hrc_na() {
+# print ("hi") >> "debug"
+}
+
+function fpsi_na() {
+ acis_na()
+ hrc_na()
+ $nf_sim_z = $nf_fam_polar = $nf_fam_az = $nf_fam_dither_type = "N/A"
+}
+
+function mono_na() {
+ $nf_mono_init = $nf_mono_range = $nf_mono_step = "N/A"
+}
+
+# this gives the pitch and yaw of the HRMA and FAM
+# positive pitch is facing the source "looking down"
+# positive yaw is looking left
+# 0 az is north 90 is up
+# this also adds in the FAM X,Y,Z positions
+
+function polaz2yawpitch(polar, az) {
+ theta = az * pi / 180
+ phi = polar * pi / 180 / 60
+
+
+ if( polar == 0 ) {
+ add2output( 0 )
+ add2output( 0 )
+ } else {
+ if(az == 0 || az == 180)
+ add2output( 0 )
+ else
+ add2output( - polar * sin(theta) )
+
+
+# x = cos (phi)
+# y = sin (phi) * cos (theta)
+# add2output( atan2(y,x)*180 / pi * 60 )
+
+ if(az == 90 || az ==270 )
+ add2output( 0 )
+ else
+ add2output( - polar * cos(theta) )
+
+ }
+# x = cos (phi)
+# z= sin (phi) * sin (theta)
+# add2output( atan2(z,x)*180 / pi * 60 )
+
+ if(config !~ /HXDA/) {
+# negative values of defocus move us farther from the source thus
+# increasing radius
+ radius = fullradius - defocus
+
+# FAM_x; FAM_y; FAM_z
+ if((offset_y == 0) && (offset_z == 0)){
+ add2output( fullradius - radius * cos (phi) )
+
+ if (az == 90 || az ==270)
+ add2output( 0 )
+ else
+ add2output( radius * sin (phi) * cos (theta) )
+
+ if (az == 0 || az == 180)
+ add2output( 0 )
+ else
+ add2output( - radius * sin (phi) * sin (theta) )
+ } else {
+# ******* THIS SEGMENT OF CODE IS NOT MATHEMATICALLY CORRECT FOR ****
+# OFF AXIS ANGLES AND IS SUPPLIED AS A WORKAROUND SINCE IT WILL
+# PROBABLY ONLY BE USED ON AXIS.
+ add2output( defocus )
+ add2output( offset_y )
+ add2output( offset_z )
+ }
+
+ } else {
+ add2output( "N/A" )
+ add2output( "N/A" )
+ add2output( "N/A" )
+ }
+}
+
+# set TIGHT/LOOSE to N/A if it is not one of the two allowed values
+function tight_na() {
+ if( $nf_tight !~ /TIGHT|LOOSE/ ) {
+ $nf_tight == "N/A"
+ }
+}
+
+# this entry is used to give certain entries names
+{
+ type = $nf_type
+ item = $nf_item
+ suite = $nf_suite
+ order = $nf_order
+ detector = $nf_detector
+ grating = $nf_grating
+ offset_y= $nf_offset_y
+ offset_z= $nf_offset_z
+ bnd = $nf_bnd
+ defocus = $nf_defocus
+}
+
+{
+ # make configuration parameter
+ # as well as setting configuration-dependent N/A values
+
+ if( $nf_bnd ~ "SCAN" ) {
+ # BND is scanning beam
+ config = "BND"
+ hxda_na()
+ fpsi_na()
+ } else {
+ if( grating == "NONE" ) {
+ config = "HRMA"
+ } else {
+ if( grating == "HETG" ) {
+ if( order != "Both" ) {
+ $nf_shutter = order substr($nf_shutter, \
+ index($nf_shutter, ",") )
+ }
+ } else {
+ order = "N/A"
+ }
+ config = "HRMA/" grating
+ }
+
+ if( detector ~ /ACIS|HRC/ ) {
+ detsys = detector
+ nsub = sub("-", ",", detsys)
+ config = config "/" detsys
+ hxda_na()
+ } else {
+ config = config "/HXDA"
+ fpsi_na()
+ if( detector == "HSI" ) {
+ hxda_na()
+ }
+ }
+ }
+
+ add2output( config )
+
+ if( $nf_src ~ /EIPS|Penning/ ) mono_na()
+
+ if( $nf_src == "Penning" ) $nf_voltage = "N/A"
+
+ itm = sprintf("%03d", item)
+
+ if(config in mnemonic) {
+ if( type in mnemonic ) {
+ ID = mnemonic[config] "-" mnemonic[type] "-" suite "." itm
+ add2output( ID )
+ } else {
+ error(type, "measurement type not in list")
+ }
+ } else {
+ error(config, "measurement configuration not in list")
+ }
+
+ # add date to output line
+ add2output( out_date )
+
+ # Convert HRMA polar and azimuthal angles to yaw and pitch
+ polaz2yawpitch($nf_hrma_polar, $nf_hrma_az)
+
+ # set TIGHT/LOOSE to N/A if it is not one of the two allowed values
+ tight_na()
+
+ # compute number of HXDA apertures
+ if( config ~ /HXDA/ && $nf_hxda_aplist != "N/A")
+ add2output( split( $nf_hxda_aplist, dummy, "," ) )
+ else
+ add2output( "N/A" )
+
+ # make sure the BND value is properly set
+ if($nf_bnd == "FIXED" && detector ~ /ACIS/)
+ $nf_bnd =bnd"-SYNC"
+ else
+ $nf_bnd = bnd"-FREE"
+ print
+}
diff --git a/contrib/awk/test/tweakfld.in b/contrib/awk/test/tweakfld.in
new file mode 100644
index 0000000..e27a9dde
--- /dev/null
+++ b/contrib/awk/test/tweakfld.in
@@ -0,0 +1,3 @@
+0.277 N/A N/A 1 1 ASC/Hank Donnelly N/A NONE ALL,ALL N/A N/A N/A Count Rate Linearity EIPS C-Ka 1.108 0.13484 N/A N/A C8H8 10.32 C8H8 20.64 FIXED 1000 NO 0 0 0 HRC,I 1000 N/A N/A N/A N/A N/A N/A N/A N/A N/A 0 N/A APT APT LISSAJOUS 44.7175 44.7175 1 N/A N/A N/A N/A N/A 0 N/A HRCCTRTLIN 0 N/A N/A N/A 10 N/A 180 0 0 N/A N/A FPSI rate
+1.486 N/A N/A 2 1 ASC/Hank Donnelly N/A NONE ALL,ALL N/A N/A N/A Count Rate Linearity EIPS Al-Ka 4.458 0.642119 N/A N/A Al 18.38 Al 36.76 FIXED 1000 NO 0 0 0 HRC,I 1000 N/A N/A N/A N/A N/A N/A N/A N/A N/A 0 N/A APT APT LISSAJOUS 5.55556 5.55556 1 N/A N/A N/A N/A N/A 0 N/A HRCCTRTLIN 0 N/A N/A N/A 10 N/A 180 0 0 N/A N/A FPSI rate
+4.51 N/A N/A 3 1 ASC/Hank Donnelly N/A NONE ALL,ALL N/A N/A N/A Count Rate Linearity EIPS Ti-Ka 22.55 3.02894 N/A N/A Ti 40.6 N/A N/A FIXED 1000 NO 0 0 0 HRC,I 1000 N/A N/A N/A N/A N/A N/A N/A N/A N/A 0 N/A APT APT LISSAJOUS 5.55556 5.55556 1 N/A N/A N/A N/A N/A 0 N/A HRCCTRTLIN 0 N/A N/A N/A 10 N/A 180 0 0 N/A N/A FPSI rate
diff --git a/contrib/awk/test/tweakfld.ok b/contrib/awk/test/tweakfld.ok
new file mode 100644
index 0000000..3c4d894
--- /dev/null
+++ b/contrib/awk/test/tweakfld.ok
@@ -0,0 +1,3 @@
+0.277 N/A N/A 1 1 ASC/Hank Donnelly N/A NONE ALL,ALL N/A N/A N/A Count Rate Linearity EIPS C-Ka 1.108 0.13484 N/A N/A C8H8 10.32 C8H8 20.64 FIXED-FREE 1000 NO 0 0 0 HRC,I 1000 N/A N/A N/A N/A N/A N/A N/A N/A N/A 0 N/A APT APT LISSAJOUS 44.7175 44.7175 1 N/A N/A N/A N/A N/A 0 N/A HRCCTRTLIN 0 N/A N/A N/A 10 N/A 180 0 0 N/A N/A FPSI rate HRMA/HRC,I Mar 10, 1996 0 0 0 0 0 N/A
+1.486 N/A N/A 2 1 ASC/Hank Donnelly N/A NONE ALL,ALL N/A N/A N/A Count Rate Linearity EIPS Al-Ka 4.458 0.642119 N/A N/A Al 18.38 Al 36.76 FIXED-FREE 1000 NO 0 0 0 HRC,I 1000 N/A N/A N/A N/A N/A N/A N/A N/A N/A 0 N/A APT APT LISSAJOUS 5.55556 5.55556 1 N/A N/A N/A N/A N/A 0 N/A HRCCTRTLIN 0 N/A N/A N/A 10 N/A 180 0 0 N/A N/A FPSI rate HRMA/HRC,I Mar 10, 1996 0 0 0 0 0 N/A
+4.51 N/A N/A 3 1 ASC/Hank Donnelly N/A NONE ALL,ALL N/A N/A N/A Count Rate Linearity EIPS Ti-Ka 22.55 3.02894 N/A N/A Ti 40.6 N/A N/A FIXED-FREE 1000 NO 0 0 0 HRC,I 1000 N/A N/A N/A N/A N/A N/A N/A N/A N/A 0 N/A APT APT LISSAJOUS 5.55556 5.55556 1 N/A N/A N/A N/A N/A 0 N/A HRCCTRTLIN 0 N/A N/A N/A 10 N/A 180 0 0 N/A N/A FPSI rate HRMA/HRC,I Mar 10, 1996 0 0 0 0 0 N/A
diff --git a/contrib/awk/version.c b/contrib/awk/version.c
new file mode 100644
index 0000000..e84bb7e
--- /dev/null
+++ b/contrib/awk/version.c
@@ -0,0 +1,50 @@
+char *version_string = "@(#)GNU Awk 3.0";
+
+/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
+ of the Right Hand Side */
+
+/* 1.03 Fixed split() to treat strings of space and tab as FS if
+ the split char is ' '.
+
+ Added -v option to print version number
+
+ Fixed bug that caused rounding when printing large numbers */
+
+/* 2.00beta Incorporated the functionality of the "new" awk as described
+ the book (reference not handy). Extensively tested, but no
+ doubt still buggy. Badly needs tuning and cleanup, in
+ particular in memory management which is currently almost
+ non-existent. */
+
+/* 2.01 JF: Modified to compile under GCC, and fixed a few
+ bugs while I was at it. I hope I didn't add any more.
+ I modified parse.y to reduce the number of reduce/reduce
+ conflicts. There are still a few left. */
+
+/* 2.02 Fixed JF's bugs; improved memory management, still needs
+ lots of work. */
+
+/* 2.10 Major grammar rework and lots of bug fixes from David.
+ Major changes for performance enhancements from David.
+ A number of minor bug fixes and new features from Arnold.
+ Changes for MSDOS from Conrad Kwok and Scott Garfinkle.
+ The gawk.texinfo and info files included! */
+
+/* 2.11 Bug fix release to 2.10. Lots of changes for portability,
+ speed, and configurability. */
+
+/* 2.12 Lots of changes for portability, speed, and configurability.
+ Several bugs fixed. POSIX compliance. Removal of last set
+ of hard-wired limits. Atari and VMS ports added. */
+
+/* 2.13 Public release of 2.12 */
+
+/* 2.14 Mostly bug fixes. */
+
+/* 2.15 Bug fixes plus intermixing of command-line source and files,
+ GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files.
+ `delete array'. OS/2 port added. */
+
+/* 3.0 RS as regexp, RT variable, FS = "", fflush builtin, posix
+ regexps, IGNORECASE applies to all comparison, autoconf, source
+ code cleanup. See the NEWS file. */
OpenPOWER on IntegriCloud