From a46c41193ff2573a4c910e19b570e9c253e714a1 Mon Sep 17 00:00:00 2001
From: jraynard <jraynard@FreeBSD.org>
Date: Tue, 14 Oct 1997 18:17:11 +0000
Subject: Virgin import of GNU awk 3.0.3

---
 contrib/awk/ACKNOWLEDGMENT        |    34 +
 contrib/awk/COPYING               |   340 +
 contrib/awk/ChangeLog             |   826 ++
 contrib/awk/FREEBSD-upgrade       |    60 +
 contrib/awk/FUTURES               |   108 +
 contrib/awk/INSTALL               |   181 +
 contrib/awk/LIMITATIONS           |    16 +
 contrib/awk/NEWS                  |  1802 ++++
 contrib/awk/PORTS                 |    36 +
 contrib/awk/POSIX.STD             |   109 +
 contrib/awk/PROBLEMS              |    10 +
 contrib/awk/README                |    94 +
 contrib/awk/README_d/README.FIRST |    21 +
 contrib/awk/acconfig.h            |    36 +
 contrib/awk/array.c               |   526 +
 contrib/awk/awk.h                 |   882 ++
 contrib/awk/awk.y                 |  2434 +++++
 contrib/awk/builtin.c             |  2048 ++++
 contrib/awk/config.h              |   207 +
 contrib/awk/custom.h              |    59 +
 contrib/awk/dfa.c                 |  2606 +++++
 contrib/awk/dfa.h                 |   364 +
 contrib/awk/doc/ChangeLog         |    91 +
 contrib/awk/doc/awk.1             |  2621 +++++
 contrib/awk/doc/gawk.texi         | 20820 ++++++++++++++++++++++++++++++++++++
 contrib/awk/eval.c                |  1720 +++
 contrib/awk/field.c               |   915 ++
 contrib/awk/gawkmisc.c            |    63 +
 contrib/awk/getopt.c              |  1000 ++
 contrib/awk/getopt.h              |   133 +
 contrib/awk/getopt1.c             |   189 +
 contrib/awk/io.c                  |  1941 ++++
 contrib/awk/main.c                |   735 ++
 contrib/awk/missing.c             |    59 +
 contrib/awk/msg.c                 |   189 +
 contrib/awk/node.c                |   515 +
 contrib/awk/patchlevel.h          |     1 +
 contrib/awk/posix/ChangeLog       |    19 +
 contrib/awk/posix/gawkmisc.c      |   108 +
 contrib/awk/random.c              |   379 +
 contrib/awk/random.h              |    29 +
 contrib/awk/re.c                  |   310 +
 contrib/awk/regex.h               |   531 +
 contrib/awk/test/ChangeLog        |   252 +
 contrib/awk/test/Makefile         |   451 +
 contrib/awk/test/Makefile.in      |   451 +
 contrib/awk/test/README           |    18 +
 contrib/awk/test/anchgsub.awk     |     1 +
 contrib/awk/test/anchgsub.in      |     1 +
 contrib/awk/test/anchgsub.ok      |     1 +
 contrib/awk/test/argarray.awk     |    14 +
 contrib/awk/test/argarray.in      |     1 +
 contrib/awk/test/argarray.ok      |     9 +
 contrib/awk/test/argtest.awk      |     4 +
 contrib/awk/test/argtest.ok       |     4 +
 contrib/awk/test/arrayparm.awk    |    21 +
 contrib/awk/test/arrayparm.ok     |     1 +
 contrib/awk/test/arrayref.awk     |    13 +
 contrib/awk/test/arrayref.ok      |     2 +
 contrib/awk/test/asgext.awk       |     1 +
 contrib/awk/test/asgext.in        |     3 +
 contrib/awk/test/asgext.ok        |     6 +
 contrib/awk/test/awkpath.ok       |     1 +
 contrib/awk/test/back89.in        |     2 +
 contrib/awk/test/back89.ok        |     1 +
 contrib/awk/test/backgsub.awk     |     4 +
 contrib/awk/test/backgsub.in      |     1 +
 contrib/awk/test/backgsub.ok      |     1 +
 contrib/awk/test/badargs.ok       |    23 +
 contrib/awk/test/childin.ok       |     1 +
 contrib/awk/test/clobber.awk      |    98 +
 contrib/awk/test/clobber.ok       |     1 +
 contrib/awk/test/clsflnam.awk     |    12 +
 contrib/awk/test/clsflnam.in      |     3 +
 contrib/awk/test/clsflnam.ok      |     0
 contrib/awk/test/compare.awk      |    13 +
 contrib/awk/test/compare.in       |     4 +
 contrib/awk/test/compare.ok       |     5 +
 contrib/awk/test/convfmt.awk      |    10 +
 contrib/awk/test/convfmt.ok       |     3 +
 contrib/awk/test/defref.awk       |     1 +
 contrib/awk/test/defref.ok        |     2 +
 contrib/awk/test/delarprm.awk     |    50 +
 contrib/awk/test/delarprm.ok      |     0
 contrib/awk/test/dynlj.awk        |     1 +
 contrib/awk/test/dynlj.ok         |     1 +
 contrib/awk/test/eofsplit.awk     |    68 +
 contrib/awk/test/eofsplit.ok      |     0
 contrib/awk/test/fflush.ok        |    16 +
 contrib/awk/test/fflush.sh        |    16 +
 contrib/awk/test/fieldwdth.ok     |     1 +
 contrib/awk/test/fldchg.awk       |     8 +
 contrib/awk/test/fldchg.in        |     1 +
 contrib/awk/test/fldchg.ok        |     3 +
 contrib/awk/test/fldchgnf.awk     |     1 +
 contrib/awk/test/fldchgnf.in      |     1 +
 contrib/awk/test/fldchgnf.ok      |     2 +
 contrib/awk/test/fnarray.awk      |     7 +
 contrib/awk/test/fnarray.ok       |     1 +
 contrib/awk/test/fnarydel.awk     |    60 +
 contrib/awk/test/fnarydel.ok      |    27 +
 contrib/awk/test/fsbs.in          |     1 +
 contrib/awk/test/fsbs.ok          |     1 +
 contrib/awk/test/fsrs.awk         |     8 +
 contrib/awk/test/fsrs.in          |     7 +
 contrib/awk/test/fsrs.ok          |     5 +
 contrib/awk/test/fstabplus.awk    |     2 +
 contrib/awk/test/fstabplus.ok     |     1 +
 contrib/awk/test/funstack.awk     |   977 ++
 contrib/awk/test/funstack.in      |   206 +
 contrib/awk/test/funstack.ok      |     0
 contrib/awk/test/gensub.awk       |     7 +
 contrib/awk/test/gensub.in        |     2 +
 contrib/awk/test/gensub.ok        |     4 +
 contrib/awk/test/getline.awk      |     1 +
 contrib/awk/test/getline.ok       |     2 +
 contrib/awk/test/getlnhd.awk      |    10 +
 contrib/awk/test/getlnhd.ok       |     2 +
 contrib/awk/test/gnureops.awk     |    45 +
 contrib/awk/test/gnureops.ok      |    17 +
 contrib/awk/test/gsubasgn.awk     |    13 +
 contrib/awk/test/gsubasgn.ok      |     4 +
 contrib/awk/test/gsubtest.awk     |     8 +
 contrib/awk/test/gsubtest.ok      |     6 +
 contrib/awk/test/igncfs.awk       |     8 +
 contrib/awk/test/igncfs.in        |     2 +
 contrib/awk/test/igncfs.ok        |     2 +
 contrib/awk/test/ignrcase.ok      |     1 +
 contrib/awk/test/inftest.awk      |     5 +
 contrib/awk/test/inftest.ok       |   105 +
 contrib/awk/test/intest.awk       |     4 +
 contrib/awk/test/intest.ok        |     1 +
 contrib/awk/test/intprec.awk      |     1 +
 contrib/awk/test/intprec.ok       |     1 +
 contrib/awk/test/lib/awkpath.awk  |     1 +
 contrib/awk/test/litoct.awk       |     1 +
 contrib/awk/test/litoct.ok        |     1 +
 contrib/awk/test/longwrds.awk     |    20 +
 contrib/awk/test/longwrds.ok      |    21 +
 contrib/awk/test/manpage          |   200 +
 contrib/awk/test/manyfiles.awk    |     1 +
 contrib/awk/test/math.awk         |    10 +
 contrib/awk/test/math.ok          |     6 +
 contrib/awk/test/messages.awk     |     9 +
 contrib/awk/test/mmap8k.in        |   143 +
 contrib/awk/test/negexp.ok        |     1 +
 contrib/awk/test/nfldstr.ok       |     0
 contrib/awk/test/nfset.awk        |     1 +
 contrib/awk/test/nfset.in         |     5 +
 contrib/awk/test/nfset.ok         |     5 +
 contrib/awk/test/nlfldsep.awk     |     2 +
 contrib/awk/test/nlfldsep.in      |     5 +
 contrib/awk/test/nlfldsep.ok      |    13 +
 contrib/awk/test/noeffect.awk     |     4 +
 contrib/awk/test/noeffect.ok      |     2 +
 contrib/awk/test/nofmtch.awk      |     1 +
 contrib/awk/test/nofmtch.ok       |     2 +
 contrib/awk/test/nondec.awk       |     1 +
 contrib/awk/test/nondec.ok        |     1 +
 contrib/awk/test/nonl.awk         |     1 +
 contrib/awk/test/nonl.ok          |     1 +
 contrib/awk/test/noparms.awk      |     1 +
 contrib/awk/test/noparms.ok       |     4 +
 contrib/awk/test/nors.in          |     1 +
 contrib/awk/test/nors.ok          |     2 +
 contrib/awk/test/numsubstr.awk    |     1 +
 contrib/awk/test/numsubstr.in     |     3 +
 contrib/awk/test/numsubstr.ok     |     3 +
 contrib/awk/test/out1.ok          |     1 +
 contrib/awk/test/out2.ok          |     2 +
 contrib/awk/test/out3.ok          |     1 +
 contrib/awk/test/paramdup.awk     |     8 +
 contrib/awk/test/paramdup.ok      |     2 +
 contrib/awk/test/pcntplus.awk     |     1 +
 contrib/awk/test/pcntplus.ok      |     1 +
 contrib/awk/test/pid.awk          |    44 +
 contrib/awk/test/pid.ok           |     0
 contrib/awk/test/pid.sh           |     5 +
 contrib/awk/test/pipeio1.awk      |    31 +
 contrib/awk/test/pipeio1.ok       |     2 +
 contrib/awk/test/pipeio2.awk      |    67 +
 contrib/awk/test/pipeio2.in       |     8 +
 contrib/awk/test/pipeio2.ok       |    16 +
 contrib/awk/test/posix.awk        |    69 +
 contrib/awk/test/posix.ok         |    16 +
 contrib/awk/test/poundbang        |     3 +
 contrib/awk/test/poundbang.ok     |     1 +
 contrib/awk/test/prdupval.awk     |     1 +
 contrib/awk/test/prdupval.in      |     1 +
 contrib/awk/test/prdupval.ok      |     1 +
 contrib/awk/test/prmarscl.awk     |     6 +
 contrib/awk/test/prmarscl.ok      |     1 +
 contrib/awk/test/prmreuse.awk     |    14 +
 contrib/awk/test/prmreuse.ok      |     0
 contrib/awk/test/prt1eval.awk     |     6 +
 contrib/awk/test/prt1eval.ok      |     1 +
 contrib/awk/test/prtoeval.awk     |     4 +
 contrib/awk/test/prtoeval.ok      |     2 +
 contrib/awk/test/rand.awk         |     6 +
 contrib/awk/test/rand.ok          |     1 +
 contrib/awk/test/reg/exp-eq.awk   |     1 +
 contrib/awk/test/reg/exp-eq.good  |     3 +
 contrib/awk/test/reg/exp-eq.in    |     3 +
 contrib/awk/test/reg/exp.awk      |     1 +
 contrib/awk/test/reg/exp.good     |     2 +
 contrib/awk/test/reg/exp.in       |     0
 contrib/awk/test/reg/func.awk     |     1 +
 contrib/awk/test/reg/func.good    |     1 +
 contrib/awk/test/reg/func.in      |     0
 contrib/awk/test/reg/func2.awk    |     2 +
 contrib/awk/test/reg/func2.good   |     2 +
 contrib/awk/test/reg/func2.in     |     0
 contrib/awk/test/reg/log.awk      |     1 +
 contrib/awk/test/reg/log.good     |     4 +
 contrib/awk/test/reg/log.in       |     0
 contrib/awk/test/regtest          |    18 +
 contrib/awk/test/reindops.awk     |     6 +
 contrib/awk/test/reindops.in      |     1 +
 contrib/awk/test/reindops.ok      |     1 +
 contrib/awk/test/reint.awk        |     1 +
 contrib/awk/test/reint.in         |     1 +
 contrib/awk/test/reint.ok         |     1 +
 contrib/awk/test/reparse.awk      |     7 +
 contrib/awk/test/reparse.in       |     1 +
 contrib/awk/test/reparse.ok       |     3 +
 contrib/awk/test/resplit.ok       |     1 +
 contrib/awk/test/rs.in            |    15 +
 contrib/awk/test/rs.ok            |     3 +
 contrib/awk/test/rswhite.awk      |     2 +
 contrib/awk/test/rswhite.in       |     2 +
 contrib/awk/test/rswhite.ok       |     2 +
 contrib/awk/test/sclforin.awk     |     1 +
 contrib/awk/test/sclforin.ok      |     1 +
 contrib/awk/test/sclifin.awk      |     7 +
 contrib/awk/test/sclifin.ok       |     1 +
 contrib/awk/test/splitargv.awk    |     7 +
 contrib/awk/test/splitargv.in     |     7 +
 contrib/awk/test/splitargv.ok     |     7 +
 contrib/awk/test/splitvar.awk     |     5 +
 contrib/awk/test/splitvar.in      |     1 +
 contrib/awk/test/splitvar.ok      |     1 +
 contrib/awk/test/splitwht.awk     |     7 +
 contrib/awk/test/splitwht.ok      |     2 +
 contrib/awk/test/sprintfc.awk     |     1 +
 contrib/awk/test/sprintfc.in      |     3 +
 contrib/awk/test/sprintfc.ok      |     3 +
 contrib/awk/test/strftlng.awk     |    11 +
 contrib/awk/test/strftlng.ok      |    58 +
 contrib/awk/test/substr.awk       |    14 +
 contrib/awk/test/substr.ok        |    10 +
 contrib/awk/test/swaplns.awk      |     7 +
 contrib/awk/test/swaplns.in       |     9 +
 contrib/awk/test/swaplns.ok       |     9 +
 contrib/awk/test/tradanch.awk     |     2 +
 contrib/awk/test/tradanch.in      |     2 +
 contrib/awk/test/tradanch.ok      |     0
 contrib/awk/test/tweakfld.awk     |   296 +
 contrib/awk/test/tweakfld.in      |     3 +
 contrib/awk/test/tweakfld.ok      |     3 +
 contrib/awk/version.c             |    50 +
 260 files changed, 49739 insertions(+)
 create mode 100644 contrib/awk/ACKNOWLEDGMENT
 create mode 100644 contrib/awk/COPYING
 create mode 100644 contrib/awk/ChangeLog
 create mode 100644 contrib/awk/FREEBSD-upgrade
 create mode 100644 contrib/awk/FUTURES
 create mode 100644 contrib/awk/INSTALL
 create mode 100644 contrib/awk/LIMITATIONS
 create mode 100644 contrib/awk/NEWS
 create mode 100644 contrib/awk/PORTS
 create mode 100644 contrib/awk/POSIX.STD
 create mode 100644 contrib/awk/PROBLEMS
 create mode 100644 contrib/awk/README
 create mode 100644 contrib/awk/README_d/README.FIRST
 create mode 100644 contrib/awk/acconfig.h
 create mode 100644 contrib/awk/array.c
 create mode 100644 contrib/awk/awk.h
 create mode 100644 contrib/awk/awk.y
 create mode 100644 contrib/awk/builtin.c
 create mode 100644 contrib/awk/config.h
 create mode 100644 contrib/awk/custom.h
 create mode 100644 contrib/awk/dfa.c
 create mode 100644 contrib/awk/dfa.h
 create mode 100644 contrib/awk/doc/ChangeLog
 create mode 100644 contrib/awk/doc/awk.1
 create mode 100644 contrib/awk/doc/gawk.texi
 create mode 100644 contrib/awk/eval.c
 create mode 100644 contrib/awk/field.c
 create mode 100644 contrib/awk/gawkmisc.c
 create mode 100644 contrib/awk/getopt.c
 create mode 100644 contrib/awk/getopt.h
 create mode 100644 contrib/awk/getopt1.c
 create mode 100644 contrib/awk/io.c
 create mode 100644 contrib/awk/main.c
 create mode 100644 contrib/awk/missing.c
 create mode 100644 contrib/awk/msg.c
 create mode 100644 contrib/awk/node.c
 create mode 100644 contrib/awk/patchlevel.h
 create mode 100644 contrib/awk/posix/ChangeLog
 create mode 100644 contrib/awk/posix/gawkmisc.c
 create mode 100644 contrib/awk/random.c
 create mode 100644 contrib/awk/random.h
 create mode 100644 contrib/awk/re.c
 create mode 100644 contrib/awk/regex.h
 create mode 100644 contrib/awk/test/ChangeLog
 create mode 100644 contrib/awk/test/Makefile
 create mode 100644 contrib/awk/test/Makefile.in
 create mode 100644 contrib/awk/test/README
 create mode 100644 contrib/awk/test/anchgsub.awk
 create mode 100644 contrib/awk/test/anchgsub.in
 create mode 100644 contrib/awk/test/anchgsub.ok
 create mode 100644 contrib/awk/test/argarray.awk
 create mode 100644 contrib/awk/test/argarray.in
 create mode 100644 contrib/awk/test/argarray.ok
 create mode 100644 contrib/awk/test/argtest.awk
 create mode 100644 contrib/awk/test/argtest.ok
 create mode 100644 contrib/awk/test/arrayparm.awk
 create mode 100644 contrib/awk/test/arrayparm.ok
 create mode 100644 contrib/awk/test/arrayref.awk
 create mode 100644 contrib/awk/test/arrayref.ok
 create mode 100644 contrib/awk/test/asgext.awk
 create mode 100644 contrib/awk/test/asgext.in
 create mode 100644 contrib/awk/test/asgext.ok
 create mode 100644 contrib/awk/test/awkpath.ok
 create mode 100644 contrib/awk/test/back89.in
 create mode 100644 contrib/awk/test/back89.ok
 create mode 100644 contrib/awk/test/backgsub.awk
 create mode 100644 contrib/awk/test/backgsub.in
 create mode 100644 contrib/awk/test/backgsub.ok
 create mode 100644 contrib/awk/test/badargs.ok
 create mode 100644 contrib/awk/test/childin.ok
 create mode 100644 contrib/awk/test/clobber.awk
 create mode 100644 contrib/awk/test/clobber.ok
 create mode 100644 contrib/awk/test/clsflnam.awk
 create mode 100644 contrib/awk/test/clsflnam.in
 create mode 100644 contrib/awk/test/clsflnam.ok
 create mode 100644 contrib/awk/test/compare.awk
 create mode 100644 contrib/awk/test/compare.in
 create mode 100644 contrib/awk/test/compare.ok
 create mode 100644 contrib/awk/test/convfmt.awk
 create mode 100644 contrib/awk/test/convfmt.ok
 create mode 100644 contrib/awk/test/defref.awk
 create mode 100644 contrib/awk/test/defref.ok
 create mode 100644 contrib/awk/test/delarprm.awk
 create mode 100644 contrib/awk/test/delarprm.ok
 create mode 100644 contrib/awk/test/dynlj.awk
 create mode 100644 contrib/awk/test/dynlj.ok
 create mode 100644 contrib/awk/test/eofsplit.awk
 create mode 100644 contrib/awk/test/eofsplit.ok
 create mode 100644 contrib/awk/test/fflush.ok
 create mode 100755 contrib/awk/test/fflush.sh
 create mode 100644 contrib/awk/test/fieldwdth.ok
 create mode 100644 contrib/awk/test/fldchg.awk
 create mode 100644 contrib/awk/test/fldchg.in
 create mode 100644 contrib/awk/test/fldchg.ok
 create mode 100644 contrib/awk/test/fldchgnf.awk
 create mode 100644 contrib/awk/test/fldchgnf.in
 create mode 100644 contrib/awk/test/fldchgnf.ok
 create mode 100644 contrib/awk/test/fnarray.awk
 create mode 100644 contrib/awk/test/fnarray.ok
 create mode 100644 contrib/awk/test/fnarydel.awk
 create mode 100644 contrib/awk/test/fnarydel.ok
 create mode 100644 contrib/awk/test/fsbs.in
 create mode 100644 contrib/awk/test/fsbs.ok
 create mode 100644 contrib/awk/test/fsrs.awk
 create mode 100644 contrib/awk/test/fsrs.in
 create mode 100644 contrib/awk/test/fsrs.ok
 create mode 100644 contrib/awk/test/fstabplus.awk
 create mode 100644 contrib/awk/test/fstabplus.ok
 create mode 100644 contrib/awk/test/funstack.awk
 create mode 100644 contrib/awk/test/funstack.in
 create mode 100644 contrib/awk/test/funstack.ok
 create mode 100644 contrib/awk/test/gensub.awk
 create mode 100644 contrib/awk/test/gensub.in
 create mode 100644 contrib/awk/test/gensub.ok
 create mode 100644 contrib/awk/test/getline.awk
 create mode 100644 contrib/awk/test/getline.ok
 create mode 100644 contrib/awk/test/getlnhd.awk
 create mode 100644 contrib/awk/test/getlnhd.ok
 create mode 100644 contrib/awk/test/gnureops.awk
 create mode 100644 contrib/awk/test/gnureops.ok
 create mode 100644 contrib/awk/test/gsubasgn.awk
 create mode 100644 contrib/awk/test/gsubasgn.ok
 create mode 100755 contrib/awk/test/gsubtest.awk
 create mode 100644 contrib/awk/test/gsubtest.ok
 create mode 100644 contrib/awk/test/igncfs.awk
 create mode 100644 contrib/awk/test/igncfs.in
 create mode 100644 contrib/awk/test/igncfs.ok
 create mode 100644 contrib/awk/test/ignrcase.ok
 create mode 100644 contrib/awk/test/inftest.awk
 create mode 100644 contrib/awk/test/inftest.ok
 create mode 100644 contrib/awk/test/intest.awk
 create mode 100644 contrib/awk/test/intest.ok
 create mode 100644 contrib/awk/test/intprec.awk
 create mode 100644 contrib/awk/test/intprec.ok
 create mode 100644 contrib/awk/test/lib/awkpath.awk
 create mode 100644 contrib/awk/test/litoct.awk
 create mode 100644 contrib/awk/test/litoct.ok
 create mode 100644 contrib/awk/test/longwrds.awk
 create mode 100644 contrib/awk/test/longwrds.ok
 create mode 100644 contrib/awk/test/manpage
 create mode 100644 contrib/awk/test/manyfiles.awk
 create mode 100644 contrib/awk/test/math.awk
 create mode 100644 contrib/awk/test/math.ok
 create mode 100644 contrib/awk/test/messages.awk
 create mode 100644 contrib/awk/test/mmap8k.in
 create mode 100644 contrib/awk/test/negexp.ok
 create mode 100644 contrib/awk/test/nfldstr.ok
 create mode 100644 contrib/awk/test/nfset.awk
 create mode 100644 contrib/awk/test/nfset.in
 create mode 100644 contrib/awk/test/nfset.ok
 create mode 100644 contrib/awk/test/nlfldsep.awk
 create mode 100644 contrib/awk/test/nlfldsep.in
 create mode 100644 contrib/awk/test/nlfldsep.ok
 create mode 100644 contrib/awk/test/noeffect.awk
 create mode 100644 contrib/awk/test/noeffect.ok
 create mode 100644 contrib/awk/test/nofmtch.awk
 create mode 100644 contrib/awk/test/nofmtch.ok
 create mode 100644 contrib/awk/test/nondec.awk
 create mode 100644 contrib/awk/test/nondec.ok
 create mode 100644 contrib/awk/test/nonl.awk
 create mode 100644 contrib/awk/test/nonl.ok
 create mode 100644 contrib/awk/test/noparms.awk
 create mode 100644 contrib/awk/test/noparms.ok
 create mode 100644 contrib/awk/test/nors.in
 create mode 100644 contrib/awk/test/nors.ok
 create mode 100644 contrib/awk/test/numsubstr.awk
 create mode 100644 contrib/awk/test/numsubstr.in
 create mode 100644 contrib/awk/test/numsubstr.ok
 create mode 100644 contrib/awk/test/out1.ok
 create mode 100644 contrib/awk/test/out2.ok
 create mode 100644 contrib/awk/test/out3.ok
 create mode 100644 contrib/awk/test/paramdup.awk
 create mode 100644 contrib/awk/test/paramdup.ok
 create mode 100644 contrib/awk/test/pcntplus.awk
 create mode 100644 contrib/awk/test/pcntplus.ok
 create mode 100644 contrib/awk/test/pid.awk
 create mode 100644 contrib/awk/test/pid.ok
 create mode 100755 contrib/awk/test/pid.sh
 create mode 100644 contrib/awk/test/pipeio1.awk
 create mode 100644 contrib/awk/test/pipeio1.ok
 create mode 100644 contrib/awk/test/pipeio2.awk
 create mode 100644 contrib/awk/test/pipeio2.in
 create mode 100644 contrib/awk/test/pipeio2.ok
 create mode 100644 contrib/awk/test/posix.awk
 create mode 100644 contrib/awk/test/posix.ok
 create mode 100755 contrib/awk/test/poundbang
 create mode 100644 contrib/awk/test/poundbang.ok
 create mode 100644 contrib/awk/test/prdupval.awk
 create mode 100644 contrib/awk/test/prdupval.in
 create mode 100644 contrib/awk/test/prdupval.ok
 create mode 100644 contrib/awk/test/prmarscl.awk
 create mode 100644 contrib/awk/test/prmarscl.ok
 create mode 100644 contrib/awk/test/prmreuse.awk
 create mode 100644 contrib/awk/test/prmreuse.ok
 create mode 100644 contrib/awk/test/prt1eval.awk
 create mode 100644 contrib/awk/test/prt1eval.ok
 create mode 100644 contrib/awk/test/prtoeval.awk
 create mode 100644 contrib/awk/test/prtoeval.ok
 create mode 100644 contrib/awk/test/rand.awk
 create mode 100644 contrib/awk/test/rand.ok
 create mode 100644 contrib/awk/test/reg/exp-eq.awk
 create mode 100644 contrib/awk/test/reg/exp-eq.good
 create mode 100644 contrib/awk/test/reg/exp-eq.in
 create mode 100644 contrib/awk/test/reg/exp.awk
 create mode 100644 contrib/awk/test/reg/exp.good
 create mode 100644 contrib/awk/test/reg/exp.in
 create mode 100644 contrib/awk/test/reg/func.awk
 create mode 100644 contrib/awk/test/reg/func.good
 create mode 100644 contrib/awk/test/reg/func.in
 create mode 100644 contrib/awk/test/reg/func2.awk
 create mode 100644 contrib/awk/test/reg/func2.good
 create mode 100644 contrib/awk/test/reg/func2.in
 create mode 100644 contrib/awk/test/reg/log.awk
 create mode 100644 contrib/awk/test/reg/log.good
 create mode 100644 contrib/awk/test/reg/log.in
 create mode 100755 contrib/awk/test/regtest
 create mode 100644 contrib/awk/test/reindops.awk
 create mode 100644 contrib/awk/test/reindops.in
 create mode 100644 contrib/awk/test/reindops.ok
 create mode 100644 contrib/awk/test/reint.awk
 create mode 100644 contrib/awk/test/reint.in
 create mode 100644 contrib/awk/test/reint.ok
 create mode 100644 contrib/awk/test/reparse.awk
 create mode 100644 contrib/awk/test/reparse.in
 create mode 100644 contrib/awk/test/reparse.ok
 create mode 100644 contrib/awk/test/resplit.ok
 create mode 100644 contrib/awk/test/rs.in
 create mode 100644 contrib/awk/test/rs.ok
 create mode 100644 contrib/awk/test/rswhite.awk
 create mode 100644 contrib/awk/test/rswhite.in
 create mode 100644 contrib/awk/test/rswhite.ok
 create mode 100644 contrib/awk/test/sclforin.awk
 create mode 100644 contrib/awk/test/sclforin.ok
 create mode 100644 contrib/awk/test/sclifin.awk
 create mode 100644 contrib/awk/test/sclifin.ok
 create mode 100644 contrib/awk/test/splitargv.awk
 create mode 100644 contrib/awk/test/splitargv.in
 create mode 100644 contrib/awk/test/splitargv.ok
 create mode 100644 contrib/awk/test/splitvar.awk
 create mode 100644 contrib/awk/test/splitvar.in
 create mode 100644 contrib/awk/test/splitvar.ok
 create mode 100644 contrib/awk/test/splitwht.awk
 create mode 100644 contrib/awk/test/splitwht.ok
 create mode 100644 contrib/awk/test/sprintfc.awk
 create mode 100644 contrib/awk/test/sprintfc.in
 create mode 100644 contrib/awk/test/sprintfc.ok
 create mode 100644 contrib/awk/test/strftlng.awk
 create mode 100644 contrib/awk/test/strftlng.ok
 create mode 100644 contrib/awk/test/substr.awk
 create mode 100644 contrib/awk/test/substr.ok
 create mode 100644 contrib/awk/test/swaplns.awk
 create mode 100644 contrib/awk/test/swaplns.in
 create mode 100644 contrib/awk/test/swaplns.ok
 create mode 100644 contrib/awk/test/tradanch.awk
 create mode 100644 contrib/awk/test/tradanch.in
 create mode 100644 contrib/awk/test/tradanch.ok
 create mode 100644 contrib/awk/test/tweakfld.awk
 create mode 100644 contrib/awk/test/tweakfld.in
 create mode 100644 contrib/awk/test/tweakfld.ok
 create mode 100644 contrib/awk/version.c

(limited to 'contrib')

diff --git a/contrib/awk/ACKNOWLEDGMENT b/contrib/awk/ACKNOWLEDGMENT
new file mode 100644
index 0000000..0851ecf
--- /dev/null
+++ b/contrib/awk/ACKNOWLEDGMENT
@@ -0,0 +1,34 @@
+The current developers of Gawk would like to thank and acknowledge the
+many people who have contributed to the development through bug reports
+and fixes and suggestions.  Unfortunately, we have not been organized
+enough to keep track of all the names -- for that we apologize.
+
+The following people were involved in porting gawk to different platforms.
+
+	Mike Lijewski <mjlx@eagle.cnsf.cornell.edu> (IBM RS6000)
+	Kent Williams (MSDOS 2.11)
+	Conrad Kwok (MSDOS earlier versions)
+	Scott Garfinkle (MSDOS earlier versions)
+	Hal Peterson <hrp@pecan.cray.com> (Cray)
+
+This group of people comprise the "GAWK crack portability team", who
+test the pre-releases and ensure portability of gawk.
+
+	Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS)
+	Michal Jaegermann <michal@gortel.phys.UAlberta.CA>
+		(Atari, NeXT, DEC 3100)
+	Scott Deifik <scottd@amgen.com> (MSDOS 2.14, 2.15, 3.0)
+	Kai Uwe Rommel <rommel@ars.de> (OS/2)
+	Darrel Hankerson <hankedr@mail.auburn.edu> (DOS and formerly OS/2)
+	Mark Moraes <Mark-Moraes@deshaw.com> (Code Center, Purify)
+	Kaveh Ghazi <ghazi@noc.rutgers.edu> (Lots of Unix variants)
+
+Michal, Scott and Darrel go out of their way to make sure that gawk
+works on non-32 bit systems, and keep me on track where portability is
+concerned.  Indeed, all of these folks are incredibly helpful; gawk would
+not be the fine program it is now without them.
+  
+Last, but far from least, we would like to thank Brian Kernighan who
+has helped to clear up many dark corners of the language and provided a
+restraining touch when we have been overly tempted by "feeping
+creaturism".
diff --git a/contrib/awk/COPYING b/contrib/awk/COPYING
new file mode 100644
index 0000000..60549be
--- /dev/null
+++ b/contrib/awk/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/contrib/awk/ChangeLog b/contrib/awk/ChangeLog
new file mode 100644
index 0000000..0fa6515
--- /dev/null
+++ b/contrib/awk/ChangeLog
@@ -0,0 +1,826 @@
+Thu May 15 12:49:08 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.3: Release tar file made.
+
+Wed May 14 08:06:08 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (do_close): add lint warning if closing something that
+	  isn't open.
+
+Tue May 13 12:14:12 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* random.c, builtin.c: remove __GLIBC__ tests, since it breaks
+	  `make test'. I prefer consistency across platforms.
+	* Makefile.in (gawk): undid April 25 changes and added comment.
+	  Putting COMPLAGS in breaks with -g on VMS POSIX.
+
+Sun May 11 14:48:04 1997  Darrell Hankerson  <hankedr@mail.auburn.edu>
+
+	* io.c [MSC_VER]: add cases for WIN32.
+	* regex.c [MSC_VER]: add cases for WIN32.
+
+Sun May 11 07:04:01 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_print): in the loop that evaluates each expression
+	  to be printed, do a dupnode to avoid bizarre output. Thanks to
+	  Michal for finding this problem.
+	* awk.y (yylex): fix scanning of hexadecimal constants.
+
+Wed May  7 15:09:25 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (get_a_record): fix casetable indexing with cast to int.
+	  Keeps Michal happy.
+
+Tue May  6 16:40:19 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (func_call): removed unneeded variables.
+
+Mon May  5 21:17:37 1997  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* missing/strftime.c [case 'v', VMS_EXT]: for VMS date format, two
+	  digit day of month should not be zero padded on the 1st through
+	  the 9th.
+
+Mon May  5 06:33:47 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* regex.h, regex.c: merge with current GLIBC version.
+
+Mon May  5 06:33:47 1997  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* io.c (nextfile): move the check for null return from iop_open
+	  in the normal case and add one for the "no args" case.
+
+Fri Apr 25 16:52:33 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* array.c (grow_table): add a bunch more large primes so arrays
+	  can get really big. Thanks to christos@deshaw.com.
+	* all files: remove ifdef'ed out code and update copyrights.
+	* Makefile.in (gawk): add $(COMPFLAGS) to command line.
+	* eval.c (flags2str): added case for FIELD.
+
+Thu Apr 24 22:39:23 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* COPYING: changed to current official version from FSF.
+	* regex.c: merge with GLIBC version.
+	* awk.h [_GNU_SOURCE]: bracket definition inside ifdef.
+	  (NODE.source_line): move name member out of `x' union and
+	  into `nodep'; avoids problems doing diagnostics.
+	  (nondec2num): put decl into #if BITOPS || NONDECDATA
+	* posix/gawkmisc.c, missing/system.c, missing/strtod.c,
+	  missing/strerror.c: move to generic GPL statement at top.
+	* builtin.c (nondec2num): put into #if BITOPS || NONDECDATA
+
+Wed Apr 23 22:14:14 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* dfa.c: misc changes for really pedantic SGI compilers.
+	* builtin.c: bracket defs of random() etc for GLIBC.
+	* random.c: bracket whole file for GLIBC.
+	* configure.in: extra goop for GETPGRP test for VMS POSIX.
+	* custom.h [VMS]: remove hard definition of GETPGRP_VOID.
+
+Fri Apr 18 07:55:47 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* BETA Release 3.0.34: Release tar file made.
+
+Tue Apr 15 21:35:45 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	NEW UNDOCUMENTED FEATURE. USE THE SOURCE LUKE!
+	* acconfig.h [NONDECDATA]: new macro.
+	* awk.h: add decl of do_strtonum.
+	* awk.y (tokentab): add entry for strtonum function.
+	* builtin.c (do_strtonum): new function.
+	* configure.in (non-decimal-data): new --enable-* option.
+	* node.c (r_force_number): change to allow non-decimal data inside
+	  ifdef NONDECDATA.
+
+Tue Apr 15 06:32:50 1997  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* missing/strftime.c (malloc, realloc, getenv, strchr): only
+	  declare these when STDC_HEADERS is not defined.
+	  <stdlib.h, string.h>: include these when STDC_HEADERS is defined.
+	* awk.h (freenode, tree_eval, m_tree_eval): reorganize definitions.
+	* alloca.c (malloc): if malloc is already defined as a macro,
+	  presumeably by config.h, don't define or declare it.
+
+Wed Apr  9 22:45:27 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in [COMPFLAGS]: per suggestion from Karl Berry, put
+	  $(CFLAGS) last.
+
+Tue Apr  8 23:54:46 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (interpret): For Node_K_break and Node_K_continue, if
+	  treating them like `next', also check the function call stack
+	  and pop it if necessary.
+
+Mon Apr  7 18:22:37 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h: Add decls of new routines do_compl() and set_loc().
+	* awk.y (tokentab): add entry for "compl" function.
+	* builtin.c (do_compl): new functionto do ones complement.
+	  (do_substr): rationalized yet again, now notices negative start
+	  and length parameters.
+	* eval.c (push_args): fix if call_list gets realloc'ed in the
+	  middle of things. Avoids crash for deeply nested function calls.
+	* main.c (catch_sig): add call to set_loc().
+	* msg.c (set_loc, srcfile, srcline): new function and private
+	  variables to help out in tracing down source of error messages.
+
+Fri Mar 28 08:42:27 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (iop_alloc, iop_close): Undo changes of Feb 11, apparently
+	  other cleanups in io.c made mmap stuff start working again.
+	  BAH! It's a mess, the test suite still fails. I'm leaving the
+	  mmap stuff undefined for now. It'll probably get ripped out in 3.1.
+
+Thu Mar 27 08:48:57 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* custom.h [_SEQUENT_]: undef HAVE_MMAP.
+
+Wed Mar 26 09:08:16 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (iop_alloc): fix definition to make it static.
+
+Mon Mar 24 23:09:07 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* field.c (init_fields, etc..): more clean up use of Null_field
+	  and the various flags.
+	* node.c (unref): if a field, free the node itself. Fixes
+	  memory leak problems.
+
+Sun Mar 23 22:51:09 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h [FIELD]: new flag for node->flags field.
+	* builtin.c (sub_common): if FIELD is set, dup the string.
+	* field.c (init_fields): set up a new Null_field global var.
+	  (init_fields, set_field, set_record) use the FIELD flag.
+	  (getfield): use Null_field instead of private variable.
+	* io.c (wait_any): comment out calls to pclose and iop_close,
+	  caused weird race conditions. See test/pipeio1.awk. Thanks
+	  to Darrell Hankerson for tracing this one down.
+
+Tue Mar 18 20:57:18 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* dfa.c (inboth): free templist; plugs memory leak.
+	* field.c (init_fields, grow_fields_arr, set_field, rebuild_record,
+	  set_record): remove PERM flag from entries in fields_arr[]. Fixes
+	  nasty memory leak.
+
+Tue Mar 18 06:33:00 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.y (dup_parms): robustified against parameter errors.
+
+Sun Mar 16 21:31:40 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	NEW UNDOCUMENTED FEATURE. USE THE SOURCE LUKE!
+	* acconfig.h [BITOPS]: new macro. If set, do octal & hex and bit ops.
+	* awk.h [isnondecimal]: new macro, and decl of new functions.
+	* awk.y (yylex): add recognition of octal and hex constants.
+	* builtin.c (do_and, do_or, do_xor, do_lshift, do_rshift): new
+	  functions that do bit operations.
+	  (nondec2awknum): new function to convert octal or hex to double.
+	* configure.in: Add AC_ARG_ENABLE for bit operations.
+	* node.c (r_force_number): add octal and hex conversion.
+
+Sun Mar 16 21:28:56 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h [IOP_NOFREE_OBJ]: new macro.
+	* io.c (iop_open, iop_alloc): add new third parameter, which is
+	  either NULL, meaning allocate a new IOP, or the address of one
+	  already allocated.  Have a static one in the `nextfile'
+	  routine, and use the IOP_NOFREE_OBJ flag for it.  All of this
+	  keeps us from reading freed memory. The `swaplns' test fails
+	  otherwise.
+	  (iop_close): if IOP_NOFREE_OBJ is set, don't free the IOBUF.
+
+Wed Feb 26 06:21:02 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (in_function, pop_fcall_stack, pop_fcall, push_args):
+	  new functions. These manage "frames" of awk function call arguments.
+	  The problem is that a `next' or a `nextfile' from a function
+	  leaks memory. These changes allow us to free up that memory.
+	  (interpret): for Node_K_next and Node_K_nextfile, check if in
+	  a function call and free all function call frames.
+
+Fri Feb 21 06:23:19 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Misc changes from Katsuyuki Okabe <HGC02147@niftyserve.or.jp>:
+	* builtin.c (do_substr): change a %d to %ld in warning message.
+	* eval.c (op_assign): fix format string for warning about %=.
+	
+Wed Feb 19 23:29:02 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* main.c (main): add do_intervals to condition that causes
+	  resetup() to be called again. Makes the --re-interval option
+	  actually work. What a concept.
+
+Fri Feb 14 09:47:31 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c [#include "awk.h"]: undef HAVE_MMAP to just use thoe old code.
+	  Something is causing a file descriptor leak, and this is getting to
+	  be just too much hair. I reserve the right to rip out the mmap
+	  code entirely at a future date.
+
+Tue Feb 11 06:28:29 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (iop_alloc): for an mmap'ed file, close the file descriptor,
+	  and then touch each page to get a private copy. Fixes nasty case
+	  of truncating our input file.
+	  (iop_close): don't call close on mmap'ed file.
+
+Wed Feb  5 17:59:04 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (interpret): For Node_K_delete, just call do_delete; let
+	  it handle the case of `delete array'.
+	* array.c (do_delete): Changed to handle case of `delete array',
+	  and made smarter if the array is actually an uninitialized
+	  parameter.
+
+Sun Jan 26 22:58:29 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* getopt.h, getopt.c, getopt1.c: replaced with new versions from
+	  GLIBC 2.
+
+Sun Jan 19 23:37:03 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (nodetype2str): not static, for debugging.
+	  (flags2str) new function: for debugging.
+	* field.c (get_field): add new var that is like Nnull_string but
+	  does not have numeric attributes, so that new fields are strings.
+	  (set_record): turn off PERM flag before unrefing fields and field 0.
+	* array.c (in_array): always evaluate subscript, could have
+	  side effects.
+	* builtin.c (do_strftime): way increase size of buffer to make sure
+	  we don't have overflow problem. Keeps Paul Eggert happy.
+	* custom.h [__amigaos__]: define fork to vfork. From Fred Fish.
+	* dfa.c: move include of config.h to top, for RSXNT. From Kai
+	  Uwe Rommel.
+	  (ISALPHA, etc): change from Jacob Engelbrecht (jaen@novo.dk)
+	  to better handle non-ascii environments.
+	* gawkmisc.c: remove amigados case, posix should now work fine.
+	* amiga/*: nuked per previous entry.
+	* Makefile.in: removed all references to amiga
+	* io.c [HAVE_SYS_PARAM_H]: Add #undef RE_DUP_MAX to avoid
+	  spurious conflict with regex.h.
+	  (flush_io): remove amiga ifdefs, not needed anymore.
+	  (spec_setup): set getrec field for special files. Fix from
+	  Mark Gray (markgray@pdt.net).
+	* node.c (more_nodes): fix to get the last entry in the array.
+
+Wed Jan  8 17:42:37 1997  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>
+
+	* io.c (mmap_get_record): Fix return value if file ends without
+	  record separator.
+
+Fri Jan  3 19:57:16 1997  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* awk.y (get_src_buf):  Test for an empty source file by detecting
+	  an initial read of 0 bytes rather than by relying on info from
+	  stat().
+
+Wed Dec 25 11:25:22 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.2: Release tar file made.
+
+Wed Dec 25 11:17:32 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (install, uninstall): use $(srcdir)/patchlevel.h.
+	  Thanks to Richard Levitte, LeViMS@stacken.kth.se.
+	  (install): remove chmod command; let $(INSTALL_PROGRAM) use -m.
+
+Mon Dec 23 20:36:59 1996  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* custom.h (#if VMS_POSIX):  Define GETPGRP_VOID.
+
+Fri Dec 20 08:59:55 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* getopt.c, getopt1.c: comment out the `#if defined (_LIBC) ||
+	  !defined (__GNU_LIBRARY__)' and `#endif' to force use of this
+	  getopt, even on systems like linux.  This will be handled
+	  better in 3.1 / glibc 2.
+
+Thu Dec 19 22:52:39 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.y (yylex): In several places, after yyerror(), add call to
+	  exit(). Otherwise, infinite messages. This should probably
+	  be handled better.
+
+Wed Dec 18 22:42:10 1996  Darrel Hankerson  <hankedr@mail.auburn.edu>
+
+	* getopt.c (_getopt_internal): if 'W' and ';', if optind == argc,
+	  return c, don't fall through.
+
+Wed Dec 18 10:09:44 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* configure.in [AC_PREREQ]: Update to 2.12 in order to switch to
+	  autoconf 2.12. Lots of other files will be rebuilt automatically.
+	  [AM_SANITY_CHECK_CC]: Removed, autoconf does it now.
+	* alocal.m4 [AM_SANITY_CHECK_CC]: Removed, autoconf does it now.
+
+Tue Dec 17 22:23:16 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_strftime): fix case if format string is "".
+	  Also fix it if format is not "" but result of strftime is "".
+	  See comments in code.
+
+Tue Dec 10 23:09:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.1: Release tar file made.
+
+Tue Dec 10 22:39:41 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (dist): add dependency on `info'. Remove line that
+	  does makeinfo.
+	  (install): use $(LN) not $(LN_S) to link gawk gawk-version.
+
+Sun Dec  8 07:53:44 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (gawk): took COMPFLAGS out of link line for help
+	  on VMS posix. Shouldn't (I hope) affect anything else.
+
+Thu Nov 28 11:52:24 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* configure.in (AC_PROG_INSTALL): Set INSTALL to install-sh.
+
+Tue Nov 26 22:42:00 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* PORTS: Updated list of systems.
+	* Makefile.in (install): Fix some typos and add some improvements
+	  for Ultrix.
+
+Sun Nov 24 22:16:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_printf): if no args, fatal error. Return silently
+	  if --traditional.
+
+Thu Nov  7 20:54:43 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (inrec): make sure EOF hasn't already happened before
+	  trying to read; prevents accessing freed buffer. Thanks to
+	  Michal Jaegermann.
+	* Makefile.in [AWKSRC]: add random.h.
+	  random.h: new file, redefines names of the `random' functions.
+	  random.c, builtin.c: add include of random.h.
+
+Thu Nov  7 09:06:21 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.y (snode): undo 4 Oct change, put do_split code back.
+	  field.c (do_split): restore old code; add test for CONST, so
+	  that re_parse_field is used if third arg to split is a regexp
+	  constant.
+
+Mon Nov  4 12:57:11 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* main.c (main): Research -m[fr] options don't need literal '='
+	  characters. Brian's documentation was confusing.  Fixed, not
+	  that anyone actually uses these options with gawk.
+
+Sun Nov  3 11:23:21 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* field.c (def_parse_field): add \n to list of acceptable white space.
+	  (posix_def_parse_field): new routine, just like def_parse_field(),
+	  but only allows space and tab as separators.
+	  (do_split, set_FS): make appropriate choice between the two
+	  *def_parse_field() routines.
+
+Fri Oct 25 10:13:06 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* configure.in: remove test for random.
+	* Makefile.in: add random.c to list of files always compiled.
+	* missing.c: remove HAVE_RANDOM test.
+	* builtin.c: remove ifdef's for HAVE_RANDOM.
+	  [GAWK_RAND_MAX]: use constant we know works with our random().
+	* random.c: new file - moved from missing/ directory.
+
+Wed Oct 23 19:46:01 1996  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* builtin.c (do_tolower, do_toupper): Add `unsigned char *' casts.
+
+Tue Oct 22 21:27:52 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c [GAWK_RANDOM_MAX]: Try to make definition a bit
+	  smarter; don't use RAND_MAX if it's equal to SHRT_MAX, blows
+	  things up.
+
+Tue Oct 22 08:49:20 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* main.c (copyleft): update copyright date to 1996.
+	  too many files to list: update copyright date to 1996.
+
+Sun Oct 20 12:21:09 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.y, dfa.c, eval.c, io.c, re.c: added various FIXME comments.
+
+Sat Oct 19 22:06:42 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (nodetype2str): make static, add prototype.
+	* field.c (sc_parse_field): cast array subscripts to int to
+	  shut up gcc warnings.
+	* gawkmisc.c: add prototype for xmalloc.
+	* awk.h: add prototype for getredirect.
+	* builtin.c (do_fflush): remove extern decl of getredirect.
+	* io.c (get_a_record, mmap_get_record): change decl of rs to int,
+	  to shut up gcc warnings.
+	* awk.y (isassignable): add a default to switch to quiet gcc.
+	* getopt.c (_getopt_internal): give default value to `indfound'.
+
+Fri Oct 18 09:00:49 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* regex.h [RE_SYNTAX_AWK]: add RE_CONTEXT_INDEP_ANCHORS.
+
+Thu Oct 17 22:32:55 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* aclocal.m4 [AM_SANITY_CHECK_CC]: added.
+	* configure.in: use it.
+
+Thu Oct 17 21:43:25 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* configure.in: add checks for locale.h and setlocale().
+	  awk.h: include locale.h and define out setlocale() if not available.
+	  main.c (main): call setlocale().
+	  builtin.c (do_tolower, do_toupper): use unsigned char pointers,
+	  to get other charsets right in different locales.
+
+Wed Oct 16 21:32:53 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (format_tree): Change initial buffer size to 512
+	  and use a constant. Allows large values of %f per bug report
+	  from sheyn@cs.bu.edu.
+
+Wed Oct 16 21:22:08 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in [MISC]: removed TAGS and tags
+	  (local-distclean): added TAGS and tags
+	  (maintainer-clean): removed TAGS and tags
+
+Wed Oct 16 12:28:43 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* main.c (version): Add call to copyleft(), per new standards.
+	  version.c: Fix text of version string to match new standards.
+
+Sun Oct  6 22:19:45 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* regex.c: updated to Emacs 19.34b base.
+
+Sun Oct  6 21:57:34 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* re.c (make_regexp): fixed to handle \8 and \9 in the middle
+	  of a regexp.
+
+Fri Oct  4 10:26:16 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.y (snode): remove case for do_split; always making the
+	  third arg a Node_regex is wrong.
+	  field.c (do_split): rationalized to distinguish `/ /' from `" "'.
+	  Generally fixed up.
+	* node.c (parse_escape): Allow single digit \x escapes.
+
+1996-10-02  Paul Eggert  <eggert@twinsun.com>
+
+	* builtin.c (format_tree):
+	Fix bug in %d and %i format: NaNs, and values
+	in the range LONG_MAX+1 .. ULONG_MAX, were mishandled.
+	Don't assume that double values <= -1 are converted to unsigned
+	long in the expected way; the C Standard doesn't guarantee this.
+
+1996-10-02  Paul Eggert  <eggert@twinsun.com>
+
+	* awk.h (INT_MAX): Remove unused symbol.
+
+Mon Sep 30 22:19:11 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* getopt.c (_getopt_internal): If 'W' is in the optstring followed
+	  by a ';' then search through the long opts table. This makes
+	  `-W foo=bar' same as `--foo=bar'.
+	* main.c (main): 'W' now prints an error message.
+	  (gawk_option): deleted the routine.
+
+Sun Sep 29 23:04:54 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (sub_common): fix several bugs with gsub when
+	  matching null strings. See test/gsubtest.awk.
+
+Fri Sep 20 17:35:54 1996  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* alloca.c (NULL): don't define if <config.h> has already done so.
+
+Fri Sep 20 11:54:31 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_print): evaluate all the expressions first and
+	  then print them. Avoids suprising behavior. See test/prtoeval.awk
+	  for an example.
+
+Tue Sep 10 06:21:40 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h [FUNC]: new flag, marks a Node_parameter_list as really
+	  being the function name; allows more checking in awk.y.
+	* awk.y (isassignable): now takes a NODE * instead of a type, to
+	  check if a function parameter is marked FUNC, then it's the function
+	  name, which is not assignable. Fix call from snode().
+	  (function_prologue): mark function name as FUNC.
+	  (yyerror): don't call exit() anymore; gawk will now report
+	  all syntax errors.
+
+Sun Sep  1 19:36:30 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* field.c (rebuild_record): after building new field 0, go through
+	  all old fields, and if they used to point into the old one,
+	  have them point into the new one.  Then turn off PERM flag before
+	  unref-ing field 0.
+
+Wed Aug 28 19:13:34 1996  Arnold D. Robbins  <arnold@math.utah.edu>
+
+	* eval.c (set_IGNORECASE): Correctly parenthesize bit operations
+	  in test and fix logic for string value.
+
+Wed Aug 28 22:06:33 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* main.c (usage): add email addresses for bug reporting, per
+	  change in GNU Coding Standards from RMS.
+
+Sun Aug 11 23:13:22 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (install): correct use of $(INSTALL_PROGRAM).
+
+Thu Aug  8 23:29:43 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* parse.y (isassignable): new function, checks in type can
+	  be assigned to.
+	  (snode): changed checking for 3rd arg of gsub to be more
+	  general, supersedes earlier change.
+
+Thu Aug  8 13:58:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* parse.y (snode): If third arg to sub or gsub is builtin
+	  function, complain, since can't substitute into result.
+	* eval.c (r_get_lhs): diagnose Node_builtin as an error, instead
+	  of falling through into default case and using cant_happen().
+
+Thu Aug  1 07:13:14 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* regex.h [RE_DEBUG]: new macro.
+	  [RE_SYNTAX_GNU_AWK]: add RE_DEBUG.
+	  [RE_SYNTAX_POSIX_AWK]: add RE_INTERVALS.
+	* regex.c (re_set_syntax): add #ifdef DEBUG code to turn on `debug'
+	  flag if RE_DEBUG set, and turn off debug if not set and debug
+	  was on.
+	* main.c (main): remove `do_intervals = TRUE' from `if (do_posix)',
+	  it's now handled in the definition of RE_SYNTAX_POSIX_AWK.
+
+Mon Jul 29 17:49:07 1996  Pat Rankin  <rankin@eql.caltech.edu>
+
+	* io.c (O_ACCMODE): define it if <fcntl.h> doesn't.
+
+Mon Jul 29 12:02:48 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (set_IGNORECASE): made somewhat smarter. gawk -v IGNORECASE=0
+	  was acting the same as -v IGNORECASE=1. Thanks to Darrell Hankerson
+	  for the bug report.
+
+Fri Jul 26 12:04:43 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h (format_val): add declaration of new routine.
+	* node.c (format_val): new routine, abstracts old guts of
+	  r_forcestring; accepts format string and index as additional params.
+	  (r_force_string): changed to call format_val.
+	* builtin.c (do_print):  don't tree_eval the tree twice in case
+	  OFMTidx != CONVFMTidx; doing so could cause side effects
+	  (from bug report by Tobias Rettstadt, xassp@ipds.uni-kiel.de).
+	  Instead, call format_val.
+
+Mon Jul 22 21:59:15 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (iop_close): change check for "is $0 in the input buffer"
+	  to use `< (iop->buf + iop->secsiz + iop->size)' instead of
+	  `< iop->end'. The latter is bogus if EOF has been hit on the
+	  file.  Fix from Darrel Hankerson based on bug report by
+	  Charles Howes (howes@grid.direct.ca).  See test/eofsplit.awk.
+
+Thu Jul 18 19:43:20 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (sub_common): backed out change of Feb 14 in favor of:
+	  (do_gensub): Changed to use make_string and then to |= TEMP
+	  flag, based on bug report and patch from Katsuyuki Okabe,
+	  hgc02147@niftyserve.or.jp.
+
+Thu Jul 18 19:23:53 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* custom.h: added ifdef for QNX, based on bug report from
+	  Michael Hunter, mphunter@qnx.com.
+
+Mon Jul 15 09:31:01 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (redirect): When finding the rp pointer, if it's not
+	  NULL, set str = rp->value. This gets the '\0' terminated
+	  version. Motivated by bug report from John Hawkinson
+	  (jhawk@bbnplanet.com).
+
+Sun Jul 14 18:40:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* configure.in: added call to AC_CHECK_LIB(m, fmod), since
+	  apparently some systems have fmod in the math library.
+	  Portability: the Holy Grail.  Sigh.
+
+Sun Jul 14 18:08:01 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h: add Jim Meyerings ISASCII etc hacks for ctype macros.
+	* builtin.c (do_toupper, do_tolower, sub_common): changed to use
+	  upper-case versions of ctype macros.
+	* main.c (main): ditto.
+	* node.c (r_force_number, parse_escape): ditto.
+
+Sun Jul 14 06:34:18 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* field.c (set_record): made it always do the PERM flag.
+	  Fixes cases where $0 is assigned to, e.g. by gsub, keeps
+	  the fields valid.
+	  (get_field): removed the call to reset_record in
+	  case where ! field0_valid. We want to leave the fields alone
+	  if they've been changed.
+
+Thu Jul 11 23:04:20 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (devopen): change tests of (flag & O_fooONLY) to
+	  (flag & O_ACCMODE) == O_fooONLY. Per (long standing) bug
+	  report from Chapman Flack.
+	  (close_redir): change final conditional to just (status != 0)
+	  so that ERRNO always set; the warning had its own `if (do_lint)'
+	  anyway.
+	* eval.c (do_split): force type of array to be Node_var_array
+	  instead of Node_var.  Per (long standing) bug report from
+	  Chapman Flack.
+
+Thu Jul 11 22:17:14 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (install): added symlink of gawk to awk if
+	  no awk in $(bindir).
+	  (LN_S): new variable for symlinking.
+	  (uninstall): remove awk if it's the same gawk.
+	* Configure.in: Added call to AC_PROG_LN_S for Makefile.in.
+
+Sun Jul  7 15:47:13 1996  Arnold D. Robbins  <arnold@infographix.com>
+
+	* main.c (main): made `--posix' turn on interval expressions.
+	  Gawk now matches its documentation. (What a concept!)
+
+Wed Jul  3 15:02:48 1996  Arnold D. Robbins  <arnold@infographix.com>
+
+	* regex.h, regex.c: upgraded to changes from Emacs 19.31.
+
+Fri May 17 08:46:07 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (get_a_record): added `continued' flag. Fix from
+	  Darrell Hankerson for when RS = "\n|something".
+
+Wed May 15 02:34:55 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (awklib/all): now depends on gawk, fixes problem
+	  with parrallel make.
+
+Tue May 14 15:02:52 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (format_tree): fix handling of '*' to deal with
+	  negative value for fieldwidth -- make positive and turn on
+	  left justify. Per bug report from Michael Brennan.
+
+Sun May 12 20:42:06 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* eval.c (r_get_lhs): case Node_subscript. Check if array name
+	  is actually a function, fatal error if so.
+
+Sun May  5 10:11:52 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (redirect): call flush_io() before creating a new output pipe,
+	  per bug report from Brian Kernighan (bwk@research.bell-labs.com).
+
+Fri Mar 15 06:38:33 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (install): use $(INSTALL_PROGRAM), not $(INSTALL).
+	  (local-distclean): add `*~' to list of files to be removed.
+	  (CFLAGS): now contains just @CFLAGS@.
+	  (COMPFLAGS): replaces use of CFLAGS, has CFLAGS plus all the
+	  other stuff.
+
+Wed Mar 13 14:19:38 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (mmap_get_record): fixed to not place sentinel at end
+	  of mmap'ed object. Won't work if file is exact multiple of
+	  disk block size. See comments in code for more info.
+	  Thanks to Rick Adams (rick@uunet.uu.net) for help in testing.
+
+Sun Mar 10 22:50:23 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* io.c (do_close): notice if we were called as `close(FILENAME)'
+	  and arrange to close the current input file. This turns out
+	  to be easy to do, just call `nextfile(TRUE)'. Based on bug report
+	  from Pascal A. Dupuis, <dupuis@lei.ucl.ac.be>.
+
+Thu Mar  7 08:08:51 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* field.c (init_fields, grow_fields, set_field, rebuild_record):
+	  Nuke the `nodes' array everywhere.  Anytime a field is unref'ed,
+	  allocate a new node that is a copy of Nnull_string. This avoids
+	  subtle memory management problems when doing a lot of assignment
+	  to fields, and tweaking of NF. Make sure that fields_arr[0] always
+	  has a type of Node_val!
+	* field.c (set_NF): If NF is decremented, clear fields between
+	  NF and parse_high_water, otherwise if NF incremented, clear
+	  fields between parse_high_water and NF.
+	* eval.c (nodetype2str): new function, used for diganostics.
+	  eval.c (interpret): use nodetype2str when finding invalid node.
+	  
+Mon Mar  4 09:02:28 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_toupper, do_tolower): use isascii along with
+	  isupper/islower before changing case, in case characters have
+	  the high bit set. This is a hack.
+
+Mon Feb 26 22:24:44 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (sub_common): if no match, and called from gensub,
+	  don't free the temporary string, since the tmp_number then
+	  writes over it.
+
+Sun Feb 25 23:13:01 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (format_tree): fixed %c to treat user input as
+	  numeric also by adding test for MAYBE_NUM.
+
+Tue Feb 20 12:25:50 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* configure.in: Added AC_FUNC_MMAP call and add madvise to
+	  list of functions to look for.
+	* awk.h [IOP_ISMAPPED]: new flag value for mmap support and new
+	  `getrec' structure member in struct iobuf.
+	* io.c (iop_alloc, iop_close): changed to map/unmap input file
+	  into memory if possible.
+	  (mmap_get_record): new function to actually retrieve the
+	  record from mmaped file.
+
+Thu Feb  1 08:56:46 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_substr): fixed lint message to use indx+1 when
+	  start position is past end of string.
+
+Sun Jan 28 07:00:56 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_substr): rationalized handling of missing length
+	  argument, as well as various accompagnying lint warnings. Previous
+	  code was slightly bogus. Talk about your Day 1 bugs.
+
+Thu Jan 25 14:09:11 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* builtin.c (do_substr): if length exceeds length of actual
+	  string, do computation of needed substring length *after*
+	  the lint warning.
+
+Wed Jan 24 10:06:16 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (gawk): Add $(CFLAGS) to link line.
+	  (Makefile): target depends on the Makefile.in files.
+	  (OTHERS): Added TAGS and tags to the distribution.
+	  (local-distclean): New rule.
+	  (distclean): Use it.
+	  (maintainer-clean): Don't `make distclean' before running submakes,
+	  since that removes makefiles needed for the submakes.
+	* builtin.c (do_strftime): Remove hard coded limit on length of result.
+	  Based on code from Paul Eggert (eggert@twinsun.com).
+
+Mon Jan 22 13:16:37 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* main.c (usage): takes new fp parameter which is either
+	  stdout for `--help' (per the GNU Coding Standards) or stderr
+	  if an error occurs. Fix all calls.
+	  (version): prints to stdout per the coding stds.
+	  (copyleft): prints to stdout now, not stderr, and exits.
+
+Fri Jan 19 08:10:29 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* regex.h [RE_GNU_AWK]: added RE_CONTEXT_INDEP_OPS to set of
+	  bits we turn off for regular operation. Breaks things like
+	  /^+[0-9]+/ to match a literal `+' at the beginning of, say,
+	  a phone number.
+
+Wed Jan 10 23:19:36 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* 3.0.0 polished up and release tar file made.
+
+Wed Dec 27 11:46:16 1995  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* 2.94.0 released to porting group (no, I haven't been good
+	  about this file; I'll do better once 3.0 is released).
+
+Mon Aug 28 23:04:30 1995  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* awk.h updated for NeXT - bracket TRUE/FALSE
+	* io.c (get_a_record): removed shadowing of 'start' in
+	* Makefile.in and doc/Makefile.in: fixed to use gawk.1 and gawk.texi,
+	  instead of gawk.1.in and gawk.texi.in.
+
+Mon Aug 25 11:04:30 1995  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* 2.90.0 released to porting group.
+
+Fri Aug 18 12:43:31 1995  Arnold D. Robbins  <arnold@puny.ssc.com>
+
+	* ChangeLog created.
diff --git a/contrib/awk/FREEBSD-upgrade b/contrib/awk/FREEBSD-upgrade
new file mode 100644
index 0000000..d2add40f
--- /dev/null
+++ b/contrib/awk/FREEBSD-upgrade
@@ -0,0 +1,60 @@
+
+Import of GNU awk 3.0.3
+
+Original source available as ftp://prep.ai.mit.edu/pub/gnu/gawk-3.0.3.tar.gz
+
+The following files and directories were removed for this import:
+
+Makefile.in
+README_d/README.VMS
+README_d/README.atari
+README_d/README.irix
+README_d/README.linux
+README_d/README.pc
+README_d/README.sco
+README_d/README.sgi
+README_d/README.solaris
+README_d/README.sony
+README_d/README.sunos4
+README_d/README.ultrix
+README_d/README.yacc
+aclocal.m4
+alloca.c
+atari/
+awklib/
+awktab.c
+configh.in
+configure
+configure.in
+doc/Makefile.in
+doc/README.card
+doc/ad.block
+doc/awkcard.in
+doc/awkforai.txt
+doc/cardfonts
+doc/colors
+doc/igawk.1
+doc/macros
+doc/no.colors
+doc/setter.outline
+doc/texinfo.tex
+install-sh
+missing/
+mkinstalldirs
+pc/
+protos.h
+regex.c
+stamp-h.in
+vms/
+
+In addition, doc/gawk.1 and doc/gawk.texi were renamed to awk.1 and awk.texi.
+
+The test sub-directory has been left in, as, although not necessary to build
+awk on FreeBSD, it will be useful to anyone changing the code.  To use it,
+do something like
+
+cd /usr/src/contrib/awk
+ln -s /path/to/new/awk gawk
+cd test && make
+
+jraynard@freebsd.org 26 Sept 1997
diff --git a/contrib/awk/FUTURES b/contrib/awk/FUTURES
new file mode 100644
index 0000000..13a312c
--- /dev/null
+++ b/contrib/awk/FUTURES
@@ -0,0 +1,108 @@
+This file lists future projects and enhancements for gawk.  Items are listed
+in roughly the order they will be done for a given release.  This file is
+mainly for use by the developers to help keep themselves on track, please
+don't bug us too much about schedules or what all this really means.
+
+With the 3.0 release, we are acknowledging that awk is not PERL, nor should
+it become PERL.  (To paraphrase Dennis Ritchie, "If you want PERL, you
+know where to get it.")
+
+The focus on the future is thus narrowed to performance and functional
+enhancements, with only minor plans for significant new features.
+
+For 3.0
+=======
+	DONE: Move to autoconf-based configure system.
+
+	DONE: Allow RS to be a regexp.
+
+	DONE: RT variable to hold text of record terminator
+
+	DONE: split() with null string as third arg to split up strings
+
+	DONE: Analogously, setting FS="" would split the input record into
+	individual characters.
+
+	DONE: Generalize IGNORECASE
+		- any value makes it work, not just numeric non-zero
+		- make it apply to *all* string comparisons
+
+	DONE: Incorporate newer dfa.c and regex.c
+
+	DONE: Go to POSIX regexps
+
+	DONE: Make regex + dfa less dependant on gawk header file includes
+
+	DONE: Source code formatting cleaned up and regularized
+
+	DONE: Clean up code by isolating system-specific functions in
+	separate files.
+
+	DONE: General sub function:
+		gensub(pat, sub, global_flag[, line])
+	that return the substituted strings and allow \1 etc.
+	in the sub string.
+
+	DONE: Add AWKPATH to ENVIRON if it's not there
+
+	DONE: Undertake significant directory reorganization.
+
+	DONE: Extensive manual cleanup:
+		Use of texinfo 2.0 features
+		Lots more examples
+		Document posix regexps
+		Document all of the above.
+
+In 3.1
+======
+	A PROCINFO array to replace /dev/pid, /dev/user, et al.
+
+	DONE: Use mmap to read input files on systems that support it.
+
+	Add `abort' statement a la Thompson awk.
+
+	Consider removing use of and/or need for the protos.h file.
+
+	Use a new or improved dfa.
+
+	Integrate GNU NLS support.
+
+	Bring out hooks for NLS support into gawk itself.
+
+	DBM storage of awk arrays. Try to allow multiple dbm packages.
+
+	Use GNU malloc.
+
+	Use rx instead of regex.
+
+	DONE: Do a reference card.
+
+	? Have strftime() pay attention to the value of ENVIRON["TZ"]
+
+	Additional manual features:
+		Document use of dbm arrays
+		Document NLS support
+		? Add exercises
+		? Add an error messages section to the manual
+		? A section on where gawk is bounded
+			regex
+			i/o
+			sun fp conversions
+
+For 3.2
+=======
+	Add a lint check if the return value of a function is used but
+	the function did not supply a value.
+
+	Do an optimization pass over parse tree?
+
+	Make 	awk '/foo/' files...	run at egrep speeds
+
+For 4.x:
+========
+
+Create a gawk compiler?
+
+Create a gawk-to-C translator? (or C++??)
+
+Provide awk profiling and debugging.
diff --git a/contrib/awk/INSTALL b/contrib/awk/INSTALL
new file mode 100644
index 0000000..a2c8722
--- /dev/null
+++ b/contrib/awk/INSTALL
@@ -0,0 +1,181 @@
+Basic Installation
+==================
+
+   These are generic installation instructions.
+
+   The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation.  It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions.  Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, a file
+`config.cache' that saves the results of its tests to speed up
+reconfiguring, and a file `config.log' containing compiler output
+(useful mainly for debugging `configure').
+
+   If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release.  If at some point `config.cache'
+contains results you don't want to keep, you may remove or edit it.
+
+   The file `configure.in' is used to create `configure' by a program
+called `autoconf'.  You only need `configure.in' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+  1. `cd' to the directory containing the package's source code and type
+     `./configure' to configure the package for your system.  If you're
+     using `csh' on an old version of System V, you might need to type
+     `sh ./configure' instead to prevent `csh' from trying to execute
+     `configure' itself.
+
+     Running `configure' takes awhile.  While running, it prints some
+     messages telling which features it is checking for.
+
+  2. Type `make' to compile the package.
+
+  3. Optionally, type `make check' to run any self-tests that come with
+     the package.
+
+  4. Type `make install' to install the programs and any data files and
+     documentation.
+
+  5. You can remove the program binaries and object files from the
+     source code directory by typing `make clean'.  To also remove the
+     files that `configure' created (so you can compile the package for
+     a different kind of computer), type `make distclean'.  There is
+     also a `make maintainer-clean' target, but that is intended mainly
+     for the package's developers.  If you use it, you may have to get
+     all sorts of other programs in order to regenerate files that came
+     with the distribution.
+
+Compilers and Options
+=====================
+
+   Some systems require unusual options for compilation or linking that
+the `configure' script does not know about.  You can give `configure'
+initial values for variables by setting them in the environment.  Using
+a Bourne-compatible shell, you can do that on the command line like
+this:
+     CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
+
+Or on systems that have the `env' program, you can do it like this:
+     env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
+
+Compiling For Multiple Architectures
+====================================
+
+   You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory.  To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'.  `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script.  `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+   If you have to use a `make' that does not supports the `VPATH'
+variable, you have to compile the package for one architecture at a time
+in the source code directory.  After you have installed the package for
+one architecture, use `make distclean' before reconfiguring for another
+architecture.
+
+Installation Names
+==================
+
+   By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc.  You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+   You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files.  If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+   In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files.  Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+   If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+   Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System).  The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+   For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+   There may be some features `configure' can not figure out
+automatically, but needs to determine by the type of host the package
+will run on.  Usually `configure' can figure that out, but if it prints
+a message saying it can not guess the host type, give it the
+`--host=TYPE' option.  TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name with three fields:
+     CPU-COMPANY-SYSTEM
+
+See the file `config.sub' for the possible values of each field.  If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the host type.
+
+   If you are building compiler tools for cross-compiling, you can also
+use the `--target=TYPE' option to select the type of system they will
+produce code for and the `--build=TYPE' option to select the type of
+system on which you are compiling the package.
+
+Sharing Defaults
+================
+
+   If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists.  Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Operation Controls
+==================
+
+   `configure' recognizes the following options to control how it
+operates.
+
+`--cache-file=FILE'
+     Use and save the results of the tests in FILE instead of
+     `./config.cache'.  Set FILE to `/dev/null' to disable caching, for
+     debugging `configure'.
+
+`--help'
+     Print a summary of the options to `configure', and exit.
+
+`--quiet'
+`--silent'
+`-q'
+     Do not print messages saying which checks are being made.
+
+`--srcdir=DIR'
+     Look for the package's source code in directory DIR.  Usually
+     `configure' can determine that directory automatically.
+
+`--version'
+     Print the version of Autoconf used to generate the `configure'
+     script, and exit.
+
+`configure' also accepts some other, not widely useful, options.
+
diff --git a/contrib/awk/LIMITATIONS b/contrib/awk/LIMITATIONS
new file mode 100644
index 0000000..05e8bc4
--- /dev/null
+++ b/contrib/awk/LIMITATIONS
@@ -0,0 +1,16 @@
+This file describes limits of gawk on a Unix system (although it
+is variable even then).  Non-Unix systems may have other limits.
+
+# of fields in a record:  MAX_LONG
+Length of input record:  MAX_INT 
+Length of output record:  unlimited
+Size of a field:  MAX_INT 
+Size of a printf string:  MAX_INT 
+Size of a literal string:  MAX_INT 
+Characters in a character class:  2^(# of bits per byte)
+# of file redirections:  unlimited
+# of pipe redirections:  min(# of processes per user, # of open files)
+double-precision floating point
+Length of source line:  unlimited
+Number of input records in one file: MAX_LONG
+Number of input records total: MAX_LONG
diff --git a/contrib/awk/NEWS b/contrib/awk/NEWS
new file mode 100644
index 0000000..2a3e7fe
--- /dev/null
+++ b/contrib/awk/NEWS
@@ -0,0 +1,1802 @@
+Changes from 3.0.2 to 3.0.3
+---------------------------
+
+The horrendous per-record memory leak introduced in 3.0.1 is gone, finally.
+
+The `amiga' directory is now gone; Amiga support is now entirely handled
+by the POSIX support.
+
+Win32 support has been added in the `pc' directory. See `README_d/README.pc'
+for more info.
+
+The mmap changes are disabled in io.c, and will be removed entirely
+in the next big release.  They were an interesting experiment that just
+really didn't work in practice.
+
+A minor memory leak that occurred when using `next' from within a
+function has also been fixed.
+
+Problems with I/O from sub-processes via a pipe are now gone.
+
+Using "/dev/pid" and the other special /dev files no longer causes a core dump.
+
+The files regex.h, regex.c, getopt.h, getopt.c, and getopt1.c have been
+merged with the versions in GNU libc. Thanks to Ulrich Drepper for his help.
+
+Some new undocumented features have been added. Use the source, Luke!
+It is not clear yet whether these will ever be fully supported.
+
+Array performance should be much better for very very large arrays. "Virtual
+memory required, real memory helpful."
+
+builtin.c:do_substr rationalized, again.
+
+The --re-interval option now works as advertised.
+
+The license text on some of the missing/* files is now generic.
+
+Lots more new test cases.
+
+Lots of other small bugs fixed, see the ChangeLog files for details.
+
+Changes from 3.0.1 to 3.0.2
+---------------------------
+
+Gawk now uses autoconf 2.12.
+
+strftime now behaves correctly if passed an empty format string or if
+the string formats to an empty result string.
+
+Several minor compilation and installation problems have been fixed.
+
+Minor page break issues in the user's guide have been fixed.
+
+Lexical errors no longer repeat ad infinitum.
+
+Changes from 3.0.0 to 3.0.1
+---------------------------
+
+Troff source for a handy-dandy five color reference card is now provided.
+Thanks to SSC for their macros.
+
+Gawk now behaves like Unix awk and mawk, in that newline acts as white
+space for separating fields and for split(), by default.  In posix mode,
+only space and tab separate fields. The documentation has been updated to
+reflect this.
+
+Tons and tons of small bugs fixed and new tests added, see the ChangeLogs.
+
+Lots fewer compile time warnings from gcc -Wall. Remaining ones aren't
+worth fixing.
+
+Gawk now pays some attention to the locale settings.
+
+Fixes to gsub to catch several corner cases.
+
+The `print' statement now evaluates all expressions first, and then
+prints them. This leads to less suprising behaviour if any expression has
+output side effects.
+
+Miscellanious improvements in regex.h and regex.c.
+
+Gawk will now install itself as gawk-M.N.P in $(bindir), and link
+`gawk' to it. This makes it easy to have multiple versions of gawk
+simultaneously. It will also now install itself as `awk' in $(bindir)
+if there is no `awk' there. This is in addition to installing itself as
+`gawk'. This change benefits the Hurd, and possibly other systems.  One
+day, gawk will drop the `g', but not yet.
+
+`--posix' turns on interval expressions. Gawk now matches its documentation.
+
+`close(FILENAME)' now does something meaningful.
+
+Field management code in field.c majorly overhauled, several times.
+
+The gensub code has been fixed, several bugs are now gone.
+
+Gawk will use mmap for data file input if it is available.
+
+The printf/sprintf code has been improved.
+
+Minor issues in Makefile setup worked on and improved.
+
+builtin.c:do_substr rationalized.
+
+Regex matching fixed so that /+[0-9]/ now matches the leading +.
+
+For building on vms, the default compiler is now DEC C rather than VAX C.
+
+Changes from 2.15.6 to 3.0.0
+----------------------------
+
+Fixed spelling of `Programming' in the copyright notice in all the files.
+
+New --re-interval option to turn on interval expressions. They're off
+by default, except for --posix, to avoid breaking old programs.
+
+Passing regexp constants as parameters to user defined functions now
+generates a lint warning.
+
+Several obscure regexp bugs fixed; alas, a small number remain.
+
+The manual has been thoroughly revised. It's now almost 50% bigger than
+it used to be.
+
+The `+' modifier in printf is now reset correctly for each item.
+
+The do_unix variable is now named do_traditional.
+
+Handling of \ in sub and gsub rationalized (somewhat, see the manual for
+the gory [and I do mean gory] details).
+
+IGNORECASE now uses ISO 8859-1 Latin-1 instead of straight ASCII. See the
+source for how to revert to pure ASCII.
+
+--lint will now warn if an assignment occurs in a conditional context.
+This may become obnoxious enough to need turning off in the future, but
+"it seemed like a good idea at the time."
+
+%hf and %Lf are now diagnosed as invalid in printf, just like %lf.
+
+Gawk no longer incorrectly closes stdin in child processes used in
+input pipelines.
+
+For integer formats, gawk now correctly treats the precision as the
+number of digits to print, not the number of characters.
+
+gawk is now much better at catching the use of scalar values when
+arrays are needed, both in function calls and the `x in y' constructs.
+
+New gensub function added. See the manual.
+
+If do_tradtional is true, octal and hex escapes in regexp constants are
+treated literally.  This matches historical behavior.
+
+yylex/nextc fixed so that even null characters can be included
+in the source code.
+
+do_format now handles cases where a format specifier doesn't end in
+a control letter. --lint reports an error.
+
+strftime() now uses a default time format equivalent to that of the
+Unix date command, thus it can be called with no arguments.
+
+Gawk now catches functions that are used but not defined at parse time
+instead of at run time. (This is a lint error, making it fatal could break
+old code.)
+
+Arrays that max out are now handled correctly.
+
+Integer formats outside the range of an unsigned long are now detected
+correctly using the SunOS 4.x cc compiler.
+
+--traditional option added as new preferred name for --compat, in keeping
+with GCC.
+
+--lint-old option added, so that warnings about things not in old awk
+are only given if explicitly asked for.
+
+`next file' has changed to one word, `nextfile'. `next file' is still
+accepted but generates a lint warning. `next file' will go away eventually.
+
+Gawk with --lint will now notice empty source files and empty data files.
+
+Amiga support using the Unix emulation added. Thanks to fnf@ninemoons.com.
+
+test/Makefile is now "parallel-make safe".
+
+Gawk now uses POSIX regexps + GNU regex ops by default. --posix goes to
+pure posix regexps, and --compat goes to traditional Unix regexps. However,
+interval expressions, even though specified by POSIX, are turned off by
+default, to avoid breaking old code.
+
+IGNORECASE now applies to string comparison as well as regexp operations.
+
+The AT&T Bell Labs Research awk fflush builtin function is now supported.
+fflush is extended to flush stdout if no arg and everything if given
+the null string as an argument.
+
+If RS is more than one character, it is treated as a regular expression
+and records are delimited accordingly.  The variable RT is set to the record
+terminator string. This is disabled in compatibility mode.
+
+If FS is set to the null string (or the third arg. of split() is the null
+string), splitting is done at every single character. This is disabled in
+compatibility mode.
+
+Gawk now uses the Autoconf generated configure script, doing away with all
+the config/* files and the machinery that went with them.  The Makefile.in
+has also changed accordingly, complete with all the standard GNU Makefile
+targets.  (Non-unix systems may still have their own config.h and Makefile;
+see the appropriate README_d/README.* and/or subdirectory.)
+
+The source code has been cleaned up somewhat and the formatting improved.
+
+Changes from 2.15.5 to 2.15.6
+-----------------------------
+
+Copyrights updated on all changed files.
+
+test directory enhanced with four new tests.
+
+Gawk now generates a warning for \x without following hexadecimal digits.
+In this case, it returns 'x', not \0.
+
+Several fixes in main.c related to variable initialization:
+	CONVFMT has a default value
+	resetup is called before initializing variables
+	the varinit table fixed up a bit (see the comments)
+
+gawk.1 updated with new BUG REPORTS section.
+
+A plain `print' inside a BEGIN or END now generates a lint warning (awk.y).
+
+Small fix in iop.c:get_a_record to avoid reading uninitialized memory.
+
+awk.y:yylex now does a better job of handling things if the source file
+does not end in a newline. Probably there is more work to be done.
+
+Memory leaks fixed in awk.y, particularly in cases of duplicate function
+parameters. Also, calling a function doesn't leak memory during parsing.
+
+Empty function bodies are now allowed (awk.y).
+
+Gawk now detects duplicate parameter names in functions (awk.y).
+
+New function `error' in msg.c added for use from awk.y.
+
+eval.c:r_get_lhs now checks if its argument is a parameter on the stack,
+and pulls down the real variable. This catches more 'using an array as
+a scalar' kinds of errors.
+
+main.c recovers C alloca space after parsing, this is important for
+bison-based parsers. re.c recovers C alloca space after doing an research.
+[Changes from Pat Rankin]
+
+builtin.c now declares the random() related functions based on
+RANDOM_MISSING from config.h. [Suggested by Pat Rankin]
+
+awk.h now handles alloca correctly for HP-UX. [Kaveh Ghazi]
+
+regex.h and config/cray60 updated for Unicos 8.0. [Hal Peterson]
+
+Fixed re.c and dfa.c so that gawk no longer leaks memory when using
+lots of dynamic regexps.
+
+Removed dependency on signed chars from `idx' variable in awk.h. Gawk
+now passes its test suite if compiled with `gcc -fno-signed-char'.
+
+Fixed warning on close in io.c to go under lint control. Too many people
+have complained about the spurious message, particularly when closing a
+child pipeline early.
+
+Gawk now correctly handles RS = "" when input is from a terminal
+(iop.c:get_a_record).
+
+Config file added for GNU.
+
+gawk 'BEGIN { exit 1 } ; END { exit }' now exits 1, as it should
+(eval.c:interpret).
+
+sub and gsub now follow posix, \ escapes both & and \. Each \ must
+be doubled initially in the program to get it into the string.
+Thanks to Mike Brennan for pointing this out (builtin.c:sub_common).
+
+If FS is "", gawk behaves like mawk and nawk, making the whole record be $1.
+Yet Another Dark Corner. Sigh (field.c:def_parse_field).
+
+Gawk now correctly recomputes string values for numbers if CONVFMT has
+changed (awk.h:force_string, node.c:r_force_string).
+
+A regexp of the form `/* this looks like a comment but is not */' will
+now generate a warning from --lint (awk.y).
+
+Gawk will no longer core dump if given an empty input file (awk.y:get_src_buf,
+iop.c:optimal_bufsize).
+
+A printf format of the form %lf is handled correctly. The `l' generates
+a lint warning (builtin.c:format_tree) [Thanks to Mark Moraes].
+
+Lynxos config file added.
+
+`continue' outside a loop treated as `next' only in compatibility mode,
+instead of by default; recent att nawk chokes on this now.  `break'
+outside a loop now treated as `next' in compatibility mode (eval.c).
+
+Bug fix in string concatenation, an arbitrary number of expressions
+are allowed (eval.c).
+
+$1 += $2 now works correctly (eval.c).
+
+Changing IGNORECASE no longer resets field-splitting to FS if it was
+using FIELDWIDTHS (eval.c, field.c).
+
+Major enhancement: $0 and NF for last record read are now preserved
+into the END rule (io.c).
+
+Regexp fixes:
+	/./ now matches a newline (regex.h)
+	^ and $ match beginning and end of string only, not any embedded
+		newlines (re.c)
+	regex.c should compile and work ok on 64-bit mips/sgi machines
+
+Changes from 2.15.4 to 2.15.5
+-----------------------------
+
+FUTURES file updated and re-arranged some with more rational schedule.
+
+Many prototypes handled better for ANSI C in protos.h.
+
+getopt.c updated somewhat.
+
+test/Makefile now removes junk directory, `bardargtest' renamed `badargs.'
+
+Bug fix in iop.c for RS = "". Eat trailing newlines off of record separator.
+
+Bug fix in Makefile.bsd44, use leading tab in actions.
+
+Fix in field.c:set_FS for FS == "\\" and IGNORECASE != 0.
+
+Config files updated or added:
+	cray60, DEC OSF/1 2.0, Utek, sgi405, next21, next30, atari/config.h,
+	sco.
+
+Fix in io.c for ENFILE as well as EMFILE, update decl of groupset to
+include OSF/1.
+
+Rationalized printing as integers if numbers are outside the range of a long.
+Changes to node.c:force_string and builtin.c.
+
+Made internal NF, NR, and FNR variables longs instead of ints.
+
+Add LIMITS_H_MISSING stuff to config.in and awk.h, and default defs for
+INT_MAX and LONG_MAX, if no limits.h file. Add a standard decl of
+the time() function for __STDC__. From ghazi@noc.rutgers.edu.
+
+Fix tree_eval in awk.h and r_tree_eval in eval.c to deal better with
+function parameters, particularly ones that are arrays.
+
+Fix eval.c to print out array names of arrays used in scalar contexts.
+
+Fix eval.c in interpret to zero out source and sourceline initially. This
+does a better job of providing source file and line number information.
+
+Fix to re_parse_field in field.c to not use isspace when RS = "", but rather
+to explicitly look for blank and tab.
+
+Fix to sc_parse_field in field.c to catch the case of the FS character at the
+end of a record.
+
+Lots of miscellanious bug fixes for memory leaks, courtesy Mark Moraes,
+also fixes for arrays.
+
+io.c fixed to warn about lack of explicit closes if --lint.
+
+Updated missing/strftime.c to match posted strftime 6.2.
+
+Bug fix in builtin.c, in case of non-match in sub_common.
+
+Updated constant used for division in builtin.c:do_rand for DEC Alpha
+and CRAY Y-MP.
+
+POSIXLY_CORRECT in the environment turns on --posix (fixed in main.c).
+
+Updated srandom prototype and calls in builtin.c.
+
+Fix awk.y to enforce posix semantics of unary +: result is numeric.
+
+Fix array.c to not rearrange the hash chain upon finding an index in
+the array.  This messed things up in cases like:
+	for (index1 in array) {
+		blah
+		if (index2 in array)	# blew away the for
+			stuff
+	}
+
+Fixed spelling errors in the man page.
+
+Fixes in awk.y so that
+	gawk '' /path/to/file
+will work without core dumping or finding parse errors.
+
+Fix main.c so that --lint will fuss about an empty program.
+Yet another fix for argument parsing in the case of unrecognized options.
+
+Bug fix in dfa.c to not attempt to free null pointers.
+
+Bug fix in builtin.c to only use DEFAULT_G_PRECISION for %g or %G.
+
+Bug fix in field.c to achieve call by value semantics for split.
+
+Changes from 2.15.3 to 2.15.4
+-----------------------------
+
+Lots of lint fixes, and do_sprintf made mostly ANSI C compatible.
+
+Man page updated and edited.
+
+Copyrights updated.
+
+Arrays now grow dynamically, initially scaling up by an order of magnitude
+  and then doubling, up to ~ 64K.  This should keep gawk's performance
+  graceful under heavy load.
+
+New `delete array' feature added.  Only documented in the man page.
+
+Switched to dfa and regex suites from grep-2.0. These offer the ability to
+  move to POSIX regexps in the next release.
+
+Disabled GNU regex ops.
+
+Research awk -m option now recognized. It does nothing in gawk, since gawk
+  has no static limits.  Only documented in the man page.
+
+New bionic (faster, better, stronger than before) hashing function.
+
+Bug fix in argument handling. `gawk -X' now notices there was no program.
+  Additional bug fixes to make --compat and --lint work again.
+
+Many changes for systems where sizeof(int) != sizeof(void *).
+
+Add explicit alloca(0) in io.c to recover space from C alloca.
+
+Fixed file descriptor leak in io.c.
+
+The --version option now follows the GNU coding standards and exits.
+
+Fixed several prototypes in protos.h.
+
+Several tests updated. On Solaris, warn that the out? tests will fail.
+
+Configuration files for SunOS with cc and Solaris 2.x added.
+
+Improved error messages in awk.y on gawk extensions if do_unix or do_compat.
+
+INSTALL file added.
+
+Fixed Atari Makefile and several VMS specific changes.
+
+Better conversion of numbers to strings on systems with broken sprintfs.
+
+Changes from 2.15.2 to 2.15.3
+-----------------------------
+
+Increased HASHSIZE to a decent number, 127 was way too small.
+
+FILENAME is now the null string in a BEGIN rule.
+
+Argument processing fixed for invalid options and missing arguments.
+
+This version will build on VMS.  This included a fix to close all files
+   and pipes opened with redirections before closing stdout and stderr.
+
+More getpgrp() defines.
+
+Changes for BSD44:  <sys/param.h> in io.c and Makefile.bsd44.
+
+All directories in the distribution are now writable.
+
+Separated LDFLAGS and CFLAGS in Makefile.  CFLAGS can now be overridden by
+  user.
+
+Make dist now builds compressed archives ending in .gz and runs doschk.
+
+Amiga port.
+
+New getopt.c fixes Alpha OSF/1 problem.
+
+Make clean now removes possible test output.
+
+Improved algorithm for multiple adjacent string concatenations leads to
+  performance improvements.
+
+Fix nasty bug whereby command-line assignments, both with -v and at run time,
+   could create variables with syntactically illegal names.
+
+Fix obscure bug in printf with %0 flag and filling.
+
+Add a lint check for substr if provided length exceeds remaining characters
+   in string.
+
+Update atari support.
+
+PC support enhanced to include support for both DOS and OS/2. (Lots more
+   #ifdefs. Sigh.)
+
+Config files for Hitachi Unix and OSF/1, courtesy of Yoko Morishita
+   (morisita@sra.co.jp)
+
+Changes from 2.15.1 to 2.15.2
+-----------------------------
+
+Additions to the FUTURES file.
+
+Document undefined order of output when using both standard output
+  and /dev/stdout or any of the /dev output files that gawk emulates in
+  the absence of OS support.
+
+Clean up the distribution generation in Makefile.in:  the info files are
+  now included, the distributed files are marked read-only and patched
+  distributions are now unpacked in a directory named with the patch level.
+
+Changes from 2.15 to 2.15.1
+---------------------------
+
+Close stdout and stderr before all redirections on program exit.  This allows
+  detection of write errors and also fixes the messages test on Solaris 2.x.
+
+Removed YYMAXDEPTH define in awk.y which was limiting the parser stack depth.
+
+Changes to config/bsd44, Makefile.bsd44 and configure to bring it into line
+  with the BSD4.4 release.
+
+Changed Makefile to use prefix, exec_prefix, bindir etc.
+
+make install now installs info files.
+
+make install now sets permissions on installed files.
+
+Make targets added:  uninstall, distclean, mostlyclean and realclean.
+
+Added config.h to cleaner and clobber make targets.
+
+Changes to config/{hpux8x,sysv3,sysv4,ultrix41} to deal with alloca().
+
+Change to getopt.h for portability.
+
+Added more special cases to the getpgrp() call.
+
+Added README.ibmrt-aos and config/ibmrt-aos.
+
+Changes from 2.14 to 2.15
+---------------------------
+
+Command-line source can now be mixed with library functions.
+
+ARGIND variable tracks index in ARGV of FILENAME.
+
+GNU style long options in addition to short options.
+
+Plan 9 style special files interpreted by gawk:
+        /dev/pid
+        /dev/ppid
+        /dev/pgrpid
+        /dev/user
+                $1 = getuid
+                $2 = geteuid
+                $3 = getgid
+                $4 = getegid
+                $5 ... $NF = getgroups if supported
+
+ERRNO variable contains error string if getline or close fails.
+
+Very old options -a and -e have gone away.
+
+Inftest has been removed from the default target in test/Makefile -- the
+  results were too machine specific and resulted in too many false alarms.
+
+A README.amiga has been added.
+
+The "too many arguments supplied for format string" warning message is only
+  in effect under the lint option.
+
+Code improvements in dfa.c.
+
+Fixed all reported bugs:
+
+	Writes are checked for failure (such as full filesystem).
+
+	Stopped (at least some) runaway error messages.
+
+	gsub(/^/, "x") does the right thing for $0 of 0, 1, or more length.
+
+	close() on a command being piped to a getline now works properly.
+
+	The input record will no longer be freed upon an explicit close()
+	of the input file.
+
+	A NUL character in FS now works.
+
+	In a substitute, \\& now means a literal backslash followed by what
+	was matched.
+
+	Integer overflow of substring length in substr() is caught.
+
+	An input record without a newline termination is handled properly.
+
+	In io.c, check is against only EMFILE so that system file table
+	  is not filled.
+
+	Renamed all files with names longer than 14 characters.
+
+	Escaped characters in regular expressions were being lost when 
+	  IGNORECASE was used.
+
+	Long source lines were not being handled properly.
+
+	Sourcefiles that ended in a tab but no newline were bombing.
+
+	Patterns that could match zero characters in split() were not working
+	  properly.
+
+	The parsedebug option was not working.
+
+	The grammar was being a bit too lenient, allowing some very dubious
+	  programs to pass.
+
+	Compilation with DEBUG defined now works.
+
+	A variable read in with getline was not being treated as a potential
+	  number.
+
+	Array subscripts were not always of string type.
+
+
+Changes from 2.13.2 to 2.14
+---------------------------
+
+Updated manual!
+
+Added "next file" to skip efficiently to the next input file.
+
+Fixed potential of overflowing buffer in do_sprintf().
+
+Plugged small memory leak in sub_common().
+
+EOF on a redirect is now "sticky" -- it can only be cleared by close()ing
+  the pipe or file.
+
+Now works if used via a #! /bin/gawk line at the top of an executable file
+  when that line ends with whitespace.
+
+Added some checks to the grammar to catch redefinition of builtin functions.
+  This could eventually be the basis for an extension to allow redefining
+  functions, but in the mean time it's a good error catching facility.
+
+Negative integer exponents now work.
+
+Modified do_system() to make sure it had a non-null string to be passed
+  to system(3). Thus, system("") will flush any pending output but not go
+  through the overhead of forking an un-needed shell.
+
+A fix to floating point comparisons so that NaNs compare right on IEEE systems.
+
+Added code to make sure we're not opening directories for reading and such.
+
+Added code to do better diagnoses of weird or null file names.
+
+Allow continue outside of a loop, unless in strict posix mode.  Lint option
+  will issue warning.
+
+New missing/strftime.c.  There has been one change that affects gawk.  Posix
+  now defines a %V conversion so the vms conversion has been changed to %v.
+  If this version is used with gawk -Wlint and they use %V in a call to
+  strftime, they'll get a warning.
+
+Error messages now conform to GNU standard (I hope).
+
+Changed comparisons to conform to the  description found in the file POSIX.
+  This is inconsistent with the current POSIX draft, but that is broken.
+  Hopefully the final POSIX standard will conform to this version.
+  (Alas, this will have to wait for 1003.2b, which will be a revision to
+  the 1003.2 standard.  That standard has been frozen with the broken
+  comparison rules.)
+
+The length of a string was a short and now is a size_t.
+
+Updated VMS help.
+
+Added quite a few new tests to the test suite and deleted many due to lack of
+  written releases.  Test output is only removed if it is identical to the
+  "good" output.
+
+Fixed a couple of bugs for reference to $0 when $0 is "" -- particularly in
+  a BEGIN block.
+
+Fixed premature freeing in construct "$0 = $0".
+
+Removed the call to wait_any() in gawk_popen(), since on at least some systems,
+  if gawk's input was from a pipe, the predecessor process in the pipe was a
+  child of gawk and this caused a deadlock.
+
+Regexp can (once again) match a newline, if given explicitly.
+
+nextopen() makes sure file name is null terminated.
+
+Fixed VMS pipe simulation.  Improved VMS I/O performance.
+
+Catch . used in variable names.
+
+Fixed bug in getline without redirect from a file -- it was quitting after the
+  first EOF, rather than trying the next file.
+
+Fixed bug in treatment of backslash at the end of a string -- it was bombing
+  rather than doing something sensible.  It is not clear what this should mean,
+  but for now I issue a warning and take it as a literal backslash.
+
+Moved setting of regexp syntax to before the option parsing in main(), to
+  handle things like -v FS='[.,;]'
+
+Fixed bug when NF is set by user -- fields_arr must be expanded if necessary
+  and "new" fields must be initialized.
+
+Fixed several bugs in [g]sub() for no match found or the match is 0-length.
+
+Fixed bug where in gsub() a pattern anchored at the beginning would still
+  substitute throughout the string.
+
+make test does not assume that . is in PATH.
+
+Fixed bug when a field beyond the end of the record was requested after
+  $0 was altered (directly or indirectly).
+
+Fixed bug for assignment to field beyond end of record -- the assigned value
+  was not found on subsequent reference to that field.
+
+Fixed bug for FS a regexp and it matches at the end of a record.
+
+Fixed memory leak for an array local to a function.
+
+Fixed hanging of pipe redirection to getline
+
+Fixed coredump on access to $0 inside BEGIN block.
+
+Fixed treatment of RS = "".  It now parses the fields correctly and strips
+  leading whitespace from a record if FS is a space.
+
+Fixed faking of /dev/stdin.
+
+Fixed problem with x += x
+
+Use of scalar as array and vice versa is now detected.
+
+IGNORECASE now obeyed for FS (even if FS is a single alphabetic character).
+
+Switch to GPL version 2.
+
+Renamed awk.tab.c to awktab.c for MSDOS and VMS tar programs.
+
+Renamed this file (CHANGES) to NEWS.
+
+Use fmod() instead of modf() and provide FMOD_MISSING #define to undo
+  this change.
+
+Correct the volatile declarations in eval.c.
+
+Avoid errant closing of the file descriptors for stdin, stdout and stderr.
+
+Be more flexible about where semi-colons can occur in programs.
+
+Check for write errors on all output, not just on close().
+
+Eliminate the need for missing/{strtol.c,vprintf.c}.
+
+Use GNU getopt and eliminate missing/getopt.c.
+
+More "lint" checking.
+
+
+Changes from 2.13.1 to 2.13.2
+-----------------------------
+
+Toward conformity with GNU standards, configure is a link to mkconf, the latter
+  to disappear in the next major release.
+
+Update to config/bsd43.
+
+Added config/apollo, config/msc60, config/cray2-50, config/interactive2.2
+
+sgi33.cc added for compilation using cc rather than gcc.
+
+Ultrix41 now propagates to config.h properly -- as part of a general
+  mechanism in configure for kludges -- #define anything from a config file
+  just gets tacked onto the end of config.h -- to be used sparingly.
+
+Got rid of an unnecessary and troublesome declaration of vprintf().
+
+Small improvement in locality of error messages.
+
+Try to diagnose use of array as scalar and vice versa -- to be improved in
+  the future.
+
+Fix for last bug fix for Cray division code--sigh.
+
+More changes to test suite to explicitly use sh.  Also get rid of 
+  a few generated files.
+
+Fixed off-by-one bug in string concatenation code.
+
+Fix for use of array that is passed in from a previous function parameter.
+  Addition to test suite for above.
+
+A number of changes associated with changing NF and access to fields
+  beyond the end of the current record.
+
+Change to missing/memcmp.c to avoid seg. fault on zero length input.
+
+Updates to test suite (including some inadvertently left out of the last patch)
+  to invoke sh explicitly (rather than rely on #!/bin/sh) and remove some
+  junk files.  test/chem/good updated to correspond to bug fixes.
+
+Changes from 2.13.0 to 2.13.1
+-----------------------------
+
+More configs and PORTS.
+
+Fixed bug wherein a simple division produced an erroneous FPE, caused by
+  the Cray division workaround -- that code is now #ifdef'd only for
+  Cray *and* fixed.
+
+Fixed bug in modulus implementation -- it was very close to the above
+  code, so I noticed it.
+
+Fixed portability problem with limits.h in missing.c
+
+Fixed portability problem with tzname and daylight -- define TZNAME_MISSING
+  if strftime() is missing and tzname is also.
+
+Better support for Latin-1 character set.
+
+Fixed portability problem in test Makefile.
+
+Updated PROBLEMS file.
+
+=============================== gawk-2.13 released =========================
+Changes from 2.12.42 to 2.12.43
+-------------------------------
+
+Typo in awk.y
+
+Fixed up strftime.3 and added doc. for %V.
+
+Changes from 2.12.41 to 2.12.42
+-------------------------------
+
+Fixed bug in devopen() -- if you had write permission in /dev,
+  it would just create /dev/stdout etc.!!
+
+Final (?) VMS update.
+
+Make NeXT use GFMT_WORKAROUND
+
+Fixed bug in sub_common() for substitute on zero-length match.  Improved the
+  code a bit while I was at it.
+
+Fixed grammar so that $i++ parses as ($i)++
+
+Put support/* back in the distribution (didn't I already do this?!)
+
+Changes from 2.12.40 to 2.12.41
+-------------------------------
+
+VMS workaround for broken %g format.
+
+Changes from 2.12.39 to 2.12.40
+-------------------------------
+
+Minor man page update.
+
+Fixed latent bug in redirect().
+
+Changes from 2.12.38 to 2.12.39
+-------------------------------
+
+Updates to test suite -- remove dependence on changing gawk.1 man page.
+
+Changes from 2.12.37 to 2.12.38
+-------------------------------
+
+Fixed bug in use of *= without whitespace following.
+
+VMS update.
+
+Updates to man page.
+
+Option handling updates in main.c
+
+test/manyfiles redone and added to bigtest.
+
+Fixed latent (on Sun) bug in handling of save_fs.
+
+Changes from 2.12.36 to 2.12.37
+-------------------------------
+
+Update REL in Makefile-dist.  Incorporate test suite into main distribution.
+
+Minor fix in regtest.
+
+Changes from 2.12.35 to 2.12.36
+-------------------------------
+
+Release takes on dual personality -- 2.12.36 and 2.13.0 -- any further
+  patches before public release won't count for 2.13, although they will for
+  2.12 -- be careful to avoid confusion!  patchlevel.h will be the last thing
+  to change.
+
+Cray updates to deal with arithmetic problems.
+
+Minor test suite updates.
+
+Fixed latent bug in parser (freeing memory).
+
+Changes from 2.12.34 to 2.12.35
+-------------------------------
+
+VMS updates.
+
+Flush stdout at top of err() and stderr at bottom.
+
+Fixed bug in eval_condition() -- it wasn't testing for MAYBE_NUM and
+  doing the force_number().
+
+Included the missing manyfiles.awk and a new test to catch the above bug which
+  I am amazed wasn't already caught by the test suite -- it's pretty basic.
+
+Changes from 2.12.33 to 2.12.34
+-------------------------------
+
+Atari updates -- including bug fix.
+
+More VMS updates -- also nuke vms/version.com.
+
+Fixed bug in handling of large numbers of redirections -- it was probably never
+  tested before (blush!).
+
+Minor rearrangement of code in r_force_number().
+
+Made chem and regtest tests a bit more portable (Ultrix again).
+
+Added another test -- manyfiles -- not invoked under any other test -- very Unix
+  specific.
+
+Rough beginning of LIMITATIONS file -- need my AWK book to complete it.
+
+Changes from 2.12.32 to 2.12.33
+-------------------------------
+
+Expunge debug.? from various files.
+
+Remove vestiges of Floor and Ceil kludge.
+
+Special case integer division -- mainly for Cray, but maybe someone else
+  will benefit.
+
+Workaround for iop_close closing an output pipe descriptor on Cray --
+  not conditional since I think it may fix a bug on SGI as well and I don't
+  think it can hurt elsewhere.
+
+Fixed memory leak in assoc_lookup().
+
+Small cleanup in test suite.
+
+Changes from 2.12.31 to 2.12.32
+-------------------------------
+
+Nuked debug.c and debugging flag -- there are better ways.
+
+Nuked version.sh and version.c in subdirectories.
+
+Fixed bug in handling of IGNORECASE.
+
+Fixed bug when FIELDWIDTHS was set via -v option.
+
+Fixed (obscure) bug when $0 is assigned a numerical value.
+
+Fixed so that escape sequences in command-line assignments work (as it already
+  said in the comment).
+
+Added a few cases to test suite.
+
+Moved support/* back into distribution.
+
+VMS updates.
+
+Changes from 2.12.30 to 2.12.31
+-------------------------------
+
+Cosmetic manual page changes.
+
+Updated sunos3 config.
+
+Small changes in test suite including renaming files over 14 chars. in length.
+
+Changes from 2.12.29 to 2.12.30
+-------------------------------
+
+Bug fix for many string concatenations in a row.
+
+Changes from 2.12.28 to 2.12.29
+-------------------------------
+
+Minor cleanup in awk.y
+
+Minor VMS update.
+ 
+Minor atari update.
+
+Changes from 2.12.27 to 2.12.28
+-------------------------------
+
+Got rid of the debugging goop in eval.c -- there are better ways.
+
+Sequent port.
+
+VMS changes left out of the last patch -- sigh!  config/vms.h renamed
+  to config/vms-conf.h.
+
+Fixed missing/tzset.c
+
+Removed use of gcvt() and GCVT_MISSING -- turns out it was no faster than
+  sprintf("%g") and caused all sorts of portability headaches.
+
+Tuned get_field() -- it was unnecessarily parsing the whole record on reference
+  to $0.
+
+Tuned interpret() a bit in the rule_node loop.
+
+In r_force_number(), worked around bug in Uglix strtod() and got rid of 
+  ugly do{}while(0) at Michal's urging.
+
+Replaced do_deref() and deref with unref(node) -- much cleaner and a bit faster.
+
+Got rid of assign_number() -- contrary to comment, it was no faster than
+  just making a new node and freeing the old one.
+
+Replaced make_number() and tmp_number() with macros that call mk_number().
+
+Changed freenode() and newnode() into macros -- the latter is getnode()
+  which calls more_nodes() as necessary.
+
+Changes from 2.12.26 to 2.12.27
+-------------------------------
+
+Completion of Cray 2 port (includes a kludge for floor() and ceil()
+  that may go or be changed -- I think that it may just be working around
+  a bug in chem that is being tweaked on the Cray).
+
+More VMS updates.
+
+Moved kludge over yacc's insertion of malloc and realloc declarations
+  from protos.h to the Makefile.
+
+Added a lisp interpreter in awk to the test suite.  (Invoked under
+  bigtest.)
+
+Cleanup in r_force_number() -- I had never gotten around to a thorough
+  profile of the cache code and it turns out to be not worth it.
+
+Performance boost -- do lazy force_number()'ing for fields etc. i.e.
+  flag them (MAYBE_NUM) and call force_number only as necessary.
+
+Changes from 2.12.25 to 2.12.26
+-------------------------------
+
+Rework of regexp stuff so that dynamic regexps have reasonable
+  performance -- string used for compiled regexp is stored and
+  compared to new string -- if same, no recompilation is necessary.
+  Also, very dynamic regexps cause dfa-based searching to be turned
+  off.
+
+Code in dev_open() is back to returning fileno(std*) rather than
+  dup()ing it.  This will be documented.  Sorry for the run-around
+  on this.
+
+Minor atari updates.
+
+Minor vms update.
+
+Missing file from MSDOS port.
+
+Added warning (under lint) if third arg. of [g]sub is a constant and
+  handle it properly in the code (i.e. return how many matches).
+
+Changes from 2.12.24 to 2.12.25
+-------------------------------
+
+MSDOS port.
+
+Non-consequential changes to regexp variables in preparation for
+  a more serious change to fix a serious performance problem.
+
+Changes from 2.12.23 to 2.12.24
+-------------------------------
+
+Fixed bug in output flushing introduced a few patches back.  This caused
+  serious performance losses.
+
+Changes from 2.12.22 to 2.12.23
+-------------------------------
+
+Accidentally left config/cray2-60 out of last patch.
+
+Added some missing dependencies to Makefile.
+
+Cleaned up mkconf a bit; made yacc the default parser (no alloca needed,
+  right?); added rs6000 hook for signed characters.
+
+Made regex.c with NO_ALLOCA undefined work.
+
+Fixed bug in dfa.c for systems where free(NULL) bombs.
+
+Deleted a few cant_happen()'s that *really* can't hapen.
+
+Changes from 2.12.21 to 2.12.22
+-------------------------------
+
+Added to config stuff the ability to choose YACC rather than bison.
+
+Fixed CHAR_UNSIGNED in config.h-dist.
+
+Second arg. of strtod() is char ** rather than const char **. 
+
+stackb is now initially malloc()'ed since it may be realloc()'ed.
+
+VMS updates.
+
+Added SIZE_T_MISSING to config stuff and a default typedef to awk.h.
+  (Maybe it is not needed on any current systems??)
+
+re_compile_pattern()'s size is now size_t unconditionally.
+
+Changes from 2.12.20 to 2.12.21
+-------------------------------
+
+Corrected missing/gcvt.c.
+
+Got rid of use of dup2() and thus DUP_MISSING.
+
+Updated config/sgi33.
+
+Turned on (and fixed) in cmp_nodes() the behaviour that I *hope* will be in
+  POSIX 1003.2 for relational comparisons.
+
+Small updates to test suite.
+
+Changes from 2.12.19 to 2.12.20
+-------------------------------
+
+Sloppy, sloppy, sloppy!!  I didn't even try to compile the last two
+  patches.  This one fixes goofs in regex.c.
+
+Changes from 2.12.18 to 2.12.19
+-------------------------------
+
+Cleanup of last patch.
+
+Changes from 2.12.17 to 2.12.18
+-------------------------------
+
+Makefile renamed to Makefile-dist.
+
+Added alloca() configuration to mkconf.  (A bit kludgey.)  Just
+  add a single line containing ALLOCA_PW, ALLOCA_S or ALLOCA_C
+  to the appropriate config file to have Makefile-dist edited
+  accordingly.
+
+Reorganized output flushing to correspond with new semantics of
+  devopen() on "/dev/std*" etc.
+
+Fixed rest of last goof!!
+
+Save and restore errno in do_pathopen().
+
+Miscellaneous atari updates.
+
+Get rid of the trailing comma in the NODETYPE definition (Cray
+  compiler won't take it).
+
+Try  to make the use of `const' consistent since Cray compiler is
+  fussy about that.  See the changes to `basename' and `myname'.
+
+It turns out that, according to section 3.8.3 (Macro Replacement)
+  of the ANSI Standard:  ``If there are sequences of preprocessing
+  tokens within the list of arguments that would otherwise act as
+  preprocessing directives, the behavior is undefined.''  That means
+  that you cannot count on the behavior of the declaration of
+  re_compile_pattern in awk.h, and indeed the Cray compiler chokes on it.
+
+Replaced alloca with malloc/realloc/free in regex.c.  It was much simpler
+  than expected.  (Inside NO_ALLOCA for now -- by default no alloca.)
+
+Added a configuration file, config/cray60, for Unicos-6.0.
+
+Changes from 2.12.16 to 2.12.17
+-------------------------------
+
+Ooops.  Goofed signal use in last patch.
+
+Changes from 2.12.15 to 2.12.16
+-------------------------------
+
+RENAMED *_dir to just * (e.g. missing_dir).
+
+Numerous VMS changes.
+
+Proper inclusion of atari and vms files.
+
+Added experimental (ifdef'd out) RELAXED_CONTINUATION and DEFAULT_FILETYPE
+ -- please comment on these!
+
+Moved pathopen() to io.c (sigh).
+
+Put local directory ahead in default AWKPATH.
+
+Added facility in mkconf to echo comments on stdout:  lines beginning
+  with "#echo " will have the remainder of the line echoed when mkconf is run.
+  Any lines starting with "#" will otherwise be treated as comments.  The
+  intent is to be able to say:
+  "#echo Make sure you uncomment alloca.c in the Makefile"
+  or the like.
+
+Prototype fix for V.4
+
+Fixed version_string to not print leading @(#).
+
+Fixed FIELDWIDTHS to work with strict (turned out to be easy).
+
+Fixed conf for V.2.
+
+Changed semantics of /dev/fd/n to be like on real /dev/fd.
+
+Several configuration and updates in the makefile.
+
+Updated manpage.
+
+Include tzset.c and system.c from missing_dir that were accidently left out of
+  the last patch.
+
+Fixed bug in cmdline variable assignment -- arg was getting freed(!) in
+  call to variable.
+
+Backed out of parse-time constant folding for now, until I can figure out
+  how to do it right.
+
+Fixed devopen() so that getline <"-" works.
+
+Changes from 2.12.14 to 2.12.15
+-------------------------------
+
+Changed config/* to a condensed form that can be used with mkconf to generate
+  a config.h from config.h-dist -- much easier to maintain.  Please check
+  carefully against what you had before for a particular system and report
+  any problems.  vms.h remains separate since the stuff at the bottom
+  didn't quite fit the mkconf model -- hopefully cleared up later.
+
+Fixed bug in grammar -- didn't allow function definition to be separated from 
+  other rules by a semi-colon.
+
+VMS fix to #includes in missing.c -- should we just be including awk.h?
+
+Updated README for texinfo.tex version.
+
+Updating of copyright in all .[chy] files.
+
+Added but commented out Michal's fix to strftime.
+
+Added tzset() emulation based on Rick Adams' code.  Added TZSET_MISSING to
+	config.h-dist.
+
+Added strftime.3 man page for missing_dir
+
+More posix:	func, **, **= don't work in -W posix
+
+More lint:	^, ^= not in old awk
+
+gawk.1:		removed ref to -DNO_DEV_FD, other minor updating.
+
+Style change:  pushbak becomes pushback() in yylex().
+
+Changes from 2.12.13 to 2.12.14
+-------------------------------
+
+Better (?) organization of awk.h -- attempt to keep all system dependencies
+  near the top and move some of the non-general things out of the config.h
+  files.
+
+Change to handling of SYSTEM_MISSING.
+
+Small change to ultrix config.
+
+Do "/dev/fd/*" etc. checking at runtime.
+
+First pass at VMS port.
+
+Improvements to error handling (when lexeme spans buffers).
+
+Fixed backslash handling -- why didn't I notice this sooner?
+
+Added programs from book to test suite and new target "bigtest" to Makefile.
+
+Changes from 2.12.12 to 2.12.13
+-------------------------------
+
+Recognize OFS and ORS specially so that OFS = 9 works without efficiency hit.
+  Took advantage of opportunity to tune do_print*() for about 10% win on a
+  print with 5 args (i.e. small but significant).
+
+Somewhat pervasive changes to reconcile CONVFMT vs. OFMT.
+
+Better initialization of builtin vars.
+
+Make config/* consistent wrt STRTOL_MISSING.
+
+Small portability improvement to alloca.s
+
+Improvements to lint code in awk.y
+
+Replaced strtol() with a better one by Chris Torek.
+
+Changes from 2.12.11 to 2.12.12
+-------------------------------
+
+Added PORTS file to record successful ports.
+
+Added #define const to nothing if not STDC and added const to strtod() header.
+
+Added * to printf capabilities and partially implemented ' ' and '+' (has an
+  effect for %d only, silently ignored for other formats).  I'm afraid that's
+  as far as I want to go before I look at a complete replacement for
+  do_sprintf().
+
+Added warning for /regexp/ on LHS of MATCHOP.
+
+Changes from 2.12.10 to 2.12.11
+-------------------------------
+
+Small Makefile improvements.
+
+Some remaining nits from the NeXT port.
+
+Got rid of bcopy() define in awk.h -- not needed anymore (??)
+
+Changed private in builtin.c -- it is special on Sequent.
+
+Added subset implementation of strtol() and STRTOL_MISSING.
+
+A little bit of cleanup in debug.c, dfa.c.
+
+Changes from 2.12.9 to 2.12.10
+------------------------------
+
+Redid compatability checking and checking for # of args.
+
+Removed all references to variables[] from outside awk.y, in preparation
+  for a more abstract interface to the symbol table.
+
+Got rid of a remaining use of bcopy() in regex.c.
+
+Changes from 2.12.8 to 2.12.9
+-----------------------------
+
+Portability improvements for atari, next and decstation.
+
+Bug fix in substr() -- wasn't handling 3rd arg. of -1 properly.
+
+Manpage updates.
+
+Moved support from src release to doc release.
+
+Updated FUTURES file.
+
+Added some "lint" warnings.
+
+Changes from 2.12.7 to 2.12.8
+-----------------------------
+
+Changed time() to systime().
+
+Changed warning() in snode() to fatal().
+
+strftime() now defaults second arg. to current time.
+
+Changes from 2.12.6 to 2.12.7
+-----------------------------
+
+Fixed bug in sub_common() involving inadequate allocation of a buffer.
+
+Added some missing files to the Makefile.
+
+Changes from 2.12.5 to 2.12.6
+-----------------------------
+
+Fixed bug wherein non-redirected getline could call iop_close() just
+  prior to a call from do_input().
+
+Fixed bug in handling of /dev/stdout and /dev/stderr.
+
+Changes from 2.12.4 to 2.12.5
+-----------------------------
+
+Updated README and support directory.
+
+Changes from 2.12.3 to 2.12.4
+-----------------------------
+
+Updated CHANGES and TODO (should have been done in previous 2 patches).
+
+Changes from 2.12.2 to 2.12.3
+-----------------------------
+
+Brought regex.c and alloca.s into line with current FSF versions.
+
+Changes from 2.12.1 to 2.12.2
+-----------------------------
+
+Portability improvements; mostly moving system prototypes out of awk.h
+
+Introduction of strftime.
+
+Use of CONVFMT.
+
+Changes from 2.12 to 2.12.1
+-----------------------------
+
+Consolidated treatment of command-line assignments (thus correcting the
+-v treatment).
+
+Rationalized builtin-variable handling into a table-driven process, thus
+simplifying variable() and eliminating spc_var().
+
+Fixed bug in handling of command-line source that ended in a newline.
+
+Simplified install() and lookup().
+
+Did away with double-mallocing of identifiers and now free second and later
+instances of a name, after the first gets installed into the symbol table.
+
+Treat IGNORECASE specially, simplifying a lot of code, and allowing
+checking against strict conformance only on setting it, rather than on each
+pattern match.
+
+Fixed regexp matching when IGNORECASE is non-zero (broken when dfa.c was 
+added).
+
+Fixed bug where $0 was not being marked as valid, even after it was rebuilt.
+This caused mangling of $0.
+
+
+Changes from 2.11.1 to 2.12
+-----------------------------
+
+Makefile:
+
+Portability improvements in Makefile.
+Move configuration stuff into config.h
+
+FSF files:
+
+Synchronized alloca.[cs] and regex.[ch] with FSF.
+
+array.c:
+
+Rationalized hash routines into one with a different algorithm.
+delete() now works if the array is a local variable.
+Changed interface of assoc_next() and avoided dereferencing past the end of the
+  array.
+
+awk.h:
+
+Merged non-prototype and prototype declarations in awk.h.
+Expanded tree_eval #define to short-circuit more calls of r_tree_eval().
+
+awk.y:
+
+Delinted some of the code in the grammar.
+Fixed and improved some of the error message printing.
+Changed to accomodate unlimited length source lines.
+Line continuation now works as advertised.
+Source lines can be arbitrarily long.
+Refined grammar hacks so that /= assignment works.  Regular expressions
+  starting with /= are recognized at the beginning of a line, after && or ||
+  and after ~ or !~.  More contexts can be added if necessary.
+Fixed IGNORECASE (multiple scans for backslash).
+Condensed expression_lists in array references.
+Detect and warn for correct # args in builtin functions -- call most of them
+  with a fixed number (i.e. fill in defaults at parse-time rather than at
+  run-time).
+Load ENVIRON only if it is referenced (detected at parse-time).
+Treat NF, FS, RS, NR, FNR specially at parse time, to improve run time.
+Fold constant expressions at parse time.
+Do make_regexp() on third arg. of split() at parse tiem if it is a constant.
+
+builtin.c:
+
+srand() returns 0 the first time called.
+Replaced alloca() with malloc() in do_sprintf().
+Fixed setting of RSTART and RLENGTH in do_match().
+Got rid of get_{one,two,three} and allowance for variable # of args. at
+  run-time -- this is now done at parse-time.
+Fixed latent bug in [g]sub whereby changes to $0 would never get made.
+Rewrote much of sub_common() for simplicity and performance.
+Added ctime() and time() builtin functions (unless -DSTRICT).  ctime() returns
+  a time string like the C function, given the number of seconds since the epoch
+  and time() returns the current time in seconds.
+do_sprintf() now checks for mismatch between format string and number of
+  arguments supplied.
+
+dfa.c
+
+This is borrowed (almost unmodified) from GNU grep to provide faster searches.
+
+eval.c
+
+Node_var, Node_var_array and Node_param_list handled from macro rather
+  than in r_tree_eval().
+Changed cmp_nodes() to not do a force_number() -- this, combined with a 
+    force_number() on ARGV[] and ENVIRON[] brings it into line with other awks
+Greatly simplified cmp_nodes().
+Separated out Node_NF, Node_FS, Node_RS, Node_NR and Node_FNR in get_lhs().
+All adjacent string concatenations now done at once.
+
+field.c
+
+Added support for FIELDWIDTHS.
+Fixed bug in get_field() whereby changes to a field were not always
+  properly reflected in $0.
+Reordered tests in parse_field() so that reference off the end of the buffer
+  doesn't happen.
+set_FS() now sets *parse_field i.e. routine to call depending on type of FS.
+It also does make_regexp() for FS if needed.  get_field() passes FS_regexp
+  to re_parse_field(), as does do_split().
+Changes to set_field() and set_record() to avoid malloc'ing and free'ing the
+  field nodes repeatedly.  The fields now just point into $0 unless they are
+  assigned to another variable or changed.  force_number() on the field is
+  *only* done when the field is needed.
+
+gawk.1
+
+Fixed troff formatting problem on .TP lines.
+
+io.c
+
+Moved some code out into iop.c.
+Output from pipes and system() calls is properly synchronized.
+Status from pipe close properly returned.
+Bug in getline with no redirect fixed.
+
+iop.c
+
+This file contains a totally revamped get_a_record and associated code.
+
+main.c
+
+Command line programs no longer use a temporary file.
+Therefore, tmpnam() no longer required.
+Deprecated -a and -e options -- they will go away in the next release,
+  but for now they cause a warning.
+Moved -C, -V, -c options to -W ala posix.
+Added -W posix option: throw out \x
+Added -W lint option.
+
+
+node.c
+
+force_number() now allows pure numerics to have leading whitespace.
+Added make_string facility to optimize case of adding an already malloc'd
+  string.
+Cleaned up and simplified do_deref().
+Fixed bug in handling of stref==255 in do_deref().
+
+re.c
+
+contains the interface to regexp code
+
+Changes from 2.11.1 to FSF version of same
+------------------------------------------
+Thu Jan  4 14:19:30 1990  Jim Kingdon  (kingdon at albert)
+
+	* Makefile (YACC): Add -y to bison part.
+
+	* missing.c: Add #include <stdio.h>.
+
+Sun Dec 24 16:16:05 1989  David J. MacKenzie  (djm at hobbes.ai.mit.edu)
+
+	* Makefile: Add (commented out) default defines for Sony News.
+
+	* awk.h: Move declaration of vprintf so it will compile when
+	-DVPRINTF_MISSING is defined.
+
+Mon Nov 13 18:54:08 1989  Robert J. Chassell  (bob at apple-gunkies.ai.mit.edu)
+
+        * gawk.texinfo: changed @-commands that are not part of the
+          standard, currently released texinfmt.el to those that are.
+          Otherwise, only people with the as-yet unreleased makeinfo.c can
+          format this file.
+
+Changes from 2.11beta to 2.11.1 (production)
+--------------------------------------------
+
+Went from "beta" to production status!!!
+
+Now flushes stdout before closing pipes or redirected files to
+synchronize output.
+
+MS-DOS changes added in.
+
+Signal handler return type parameterized in Makefile and awk.h and
+some lint removed.  debug.c cleaned up.
+
+Fixed FS splitting to never match null strings, per book.
+
+Correction to the manual's description of FS.
+
+Some compilers break on char *foo = "string" + 4 so fixed version.sh and
+main.c.
+
+Changes from 2.10beta to 2.11beta
+---------------------------------
+
+This release fixes all reported bugs that we could reproduce.  Probably
+some of the changes are not documented here.
+
+The next release will probably not be a beta release!
+
+The most important change is the addition of the -nostalgia option. :-)
+
+The documentation has been improved and brought up-to-date.
+
+There has been a lot of general cleaning up of the code that is not otherwise
+documented here.  There has been a movement toward using standard-conforming
+library routines and providing them (in missing.d) for systems lacking them.
+Improved (hopefully) configuration through Makfile modifications and missing.c.
+In particular, straightened out confusion over vprintf #defines, declarations
+etc.
+
+Deleted RCS log comments from source, to reduce source size by about one third.
+Most of them were horribly out-of-date, anyway.
+
+Renamed source files to reflect (for the most part) their contents.
+
+More and improved error messages.  Cleanup and fixes to yyerror().
+String constants are not altered in input buffer, so error messages come out 
+better.  Fixed usage message.  Make use of ANSI C strerror() function
+(provided).
+
+Plugged many more memory leaks.  The memory consumption is now quite
+reasonable over a wide range of programs.
+
+Uses volatile declaration if STDC > 0 to avoid problems due to longjmp.
+
+New -a and -e options to use awk or egrep style regexps, respectively,
+since POSIX says awk should use egrep regexps.  Default is -a.
+
+Added -v option for setting variables before the first file is encountered.
+Version information now uses -V and copyleft uses -C.
+
+Added a patchlevel.h file and its use for -V and -C.
+
+Append_right() optimized for major improvement to programs with a *lot*
+of statements.
+
+Operator precedence has been corrected to match draft Posix.
+
+Tightened up grammar for builtin functions so that only length
+may be called without arguments or parentheses.
+
+/regex/ is now a normal expression that can appear in any expression
+context.
+
+Allow /= to begin a regexp.  Allow ..[../..].. in a regexp.
+
+Allow empty compound statements ({}).
+
+Made return and next illegal outside a function and in BEGIN/END respectively.
+
+Division by zero is now illegal and causes a fatal error.
+
+Fixed exponentiation so that x ^ 0 and x ^= 0 both return 1.
+
+Fixed do_sqrt, do_log, and do_exp to do argument/return checking and
+print an error message, per the manual.
+
+Fixed main to catch SIGSEGV to get source and data file line numbers.
+
+Fixed yyerror to print the ^ at the beginning of the bad token, not the end.
+
+Fix to substr() builtin:  it was failing if the arguments
+weren't already strings.
+
+Added new node value flag NUMERIC to indicate that a variable is
+purely a number as opposed to type NUM which indicates that
+the node's numeric value is valid.  This is set in make_number(),
+tmp_number and r_force_number() when appropriate and used in
+cmp_nodes().  This fixed a bug in comparison of variables that had
+numeric prefixes.  The new code uses strtod() and eliminates is_a_number().
+A simple strtod() is provided for systems lacking one.  It does no
+overflow checking, so could be improved.
+
+Simplification and efficiency improvement in force_string.
+
+Added performance tweak in r_force_number().
+
+Fixed a bug with nested loops and break/continue in functions.
+
+Fixed inconsistency in handling of empty fields when $0 has to be rebuilt.
+Happens to simplify rebuild_record().
+
+Cleaned up the code associated with opening a pipe for reading.  Gawk
+now has its own popen routine (gawk_popen) that allocates an IOBUF
+and keeps track of the pid of the child process.  gawk_pclose
+marks the appropriate child as defunct in the right struct redirect.
+
+Cleaned up and fixed close_redir().
+
+Fixed an obscure bug to do with redirection.  Intermingled ">" and ">>"
+redirects did not output in a predictable order.
+
+Improved handling of output buffering:  now all print[f]s redirected to a tty
+or pipe are flushed immediately and non-redirected output to a tty is flushed
+before the next input record is read.
+
+Fixed a bug in get_a_record() where bcopy() could have copied over
+a random pointer.
+
+Fixed a bug when RS="" and records separated by multiple blank lines.
+
+Got rid of SLOWIO code which was out-of-date anyway.
+
+Fix in get_field() for case where $0 is changed and then $(n) are
+changed and then $0 is used.
+
+Fixed infinite loop on failure to open file for reading from getline.
+Now handles redirect file open failures properly.
+
+Filenames such as /dev/stdin now allowed on the command line as well as
+in redirects.
+
+Fixed so that gawk '$1' where $1 is a zero tests false.
+
+Fixed parsing so that `RLENGTH -1' parses the same as `RLENGTH - 1',
+for example.
+
+The return from a user-defined function now defaults to the Null node.
+This fixes a core-dump-causing bug when the return value  of a function
+is used and that function returns no value.
+
+Now catches floating point exceptions to avoid core dumps.
+
+Bug fix for deleting elements of an array -- under some conditions, it was
+deleting more than one element at a time.
+
+Fix in AWKPATH code for running off the end of the string.
+
+Fixed handling of precision in *printf calls.  %0.2d now works properly,
+as does %c.  [s]printf now recognizes %i and %X.
+
+Fixed a bug in printing of very large (>240) strings.
+
+Cleaned up erroneous behaviour for RS == "".
+
+Added IGNORECASE support to index().
+
+Simplified and fixed newnode/freenode.
+
+Fixed reference to $(anything) in a BEGIN block.
+
+Eliminated use of USG rand48().
+
+Bug fix in force_string for machines with 16-bit ints.
+
+Replaced use of mktemp() with tmpnam() and provided a partial implementation of
+the latter for systems that don't have it.
+
+Added a portability check for includes in io.c.
+
+Minor portability fix in alloc.c plus addition of xmalloc().
+
+Portability fix:  on UMAX4.2, st_blksize is zero for a pipe, thus breaking
+iop_alloc() -- fixed.
+
+Workaround for compiler bug on Sun386i in do_sprintf.
+
+More and improved prototypes in awk.h.
+
+Consolidated C escape parsing code into one place.
+
+strict flag is now turned on only when invoked with compatability option.
+It now applies to fewer things.
+
+Changed cast of f._ptr in vprintf.c from (unsigned char *) to (char *).
+Hopefully this is right for the systems that use this code (I don't).
+
+Support for pipes under MSDOS added.
diff --git a/contrib/awk/PORTS b/contrib/awk/PORTS
new file mode 100644
index 0000000..c6cbb83
--- /dev/null
+++ b/contrib/awk/PORTS
@@ -0,0 +1,36 @@
+A recent version of gawk has been successfully compiled and run "make test"
+on the following:
+
+Using cc:
+	Dec Alpha OSF 4.0
+	HP9000/755 HP-UX 9.01
+	IBM PowerPC AIX 4.1.4.0
+	SCO Unix (OpenServer 5)
+	SGI IRIX 4.0.5
+	SGI IRIX 5.3
+	SGI IRIX 6.1
+	SGI IRIX 6.2
+	SunOS 4.1.3
+	SunOS 5.5
+	IBM SP2 AIX 4.1
+
+Other systems:
+	DEC Alpha		Linux/AXP
+	DEC Alpha               OSF/1 3.2
+	DECstation 5000         ULTRIX 4.3
+	HP 9000/735             HP-UX 10.01
+	IBM RS/6000             AIX 3.2
+	IBM SP2                 AIX 4.1
+	Intel x86		DOS  (compiler: djgpp v2, emx+gcc,
+					and MSC 6.00A, 7, and 8)
+	Intel x86		Linux 2.0.27
+	Intel x86		Linux 2.1.36
+	Intel x86		OS+2 (compiler: emx+gcc)
+	NeXT Turbostation       Mach 3.3
+	SGI Indigo/2            IRIX 5.3
+	SGI O2			IRIX 6.2
+	SGI PowerChallenge      IRIX 6.1
+	Sun SPARC		Linux 2.0.22
+	Sun SPARC               Solaris 2.5
+	Sun SPARC               Solaris 2.5.1
+	Sun SPARC               SunOS 4.1.3
diff --git a/contrib/awk/POSIX.STD b/contrib/awk/POSIX.STD
new file mode 100644
index 0000000..ac8e1ab
--- /dev/null
+++ b/contrib/awk/POSIX.STD
@@ -0,0 +1,109 @@
+August 1995:
+
+Although the published 1003.2 standard contained the incorrect
+comparison rules of 11.2 draft as described below, no actual implementation
+of awk (that I know of) actually used those rules.
+
+A revision of the 1003.2 standard is in progress, and in the May 1995
+draft, the rules were fixed (based on my submissions for interpretation
+requests) to match the description given below. Thus, the next version
+of the standard will have a correct description of the comparison
+rules.
+
+June 1992:
+
+Right now, the numeric vs. string comparisons are screwed up in draft
+11.2.  What prompted me to check it out was the note in gnu.bug.utils
+which observed that gawk was doing the comparison  $1 == "000"
+numerically.  I think that we can agree that intuitively, this should
+be done as a string comparison.  Version 2.13.2 of gawk follows the
+current POSIX draft.  Following is how I (now) think this
+stuff should be done. 
+
+1.  A numeric literal or the result of a numeric operation has the NUMERIC
+    attribute.
+
+2.  A string literal or the result of a string operation has the STRING
+    attribute.
+
+3.  Fields, getline input, FILENAME, ARGV elements, ENVIRON elements and the
+    elements of an array created by split() that are numeric strings
+    have the STRNUM attribute.  Otherwise, they have the STRING attribute.
+    Uninitialized variables also have the STRNUM attribute.
+
+4.  Attributes propagate across assignments, but are not changed by
+    any use.  (Although a use may cause the entity to acquire an additional
+    value such that it has both a numeric and string value -- this leaves the
+    attribute unchanged.)
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following (symmetric) matrix:
+
+	+----------------------------------------------
+	|	STRING		NUMERIC		STRNUM
+--------+----------------------------------------------
+	|
+STRING	|	string		string		string
+	|
+NUMERIC	|	string		numeric		numeric
+	|
+STRNUM	|	string		numeric		numeric
+--------+----------------------------------------------
+
+So, the following program should print all OKs.
+
+echo '0e2 0a 0 0b
+0e2 0a 0 0b' |
+$AWK '
+NR == 1 {
+	num = 0
+	str = "0e2"
+
+	print ++test ": " (	(str == "0e2")	? "OK" : "OOPS" )
+	print ++test ": " (	("0e2" != 0)	? "OK" : "OOPS" )
+	print ++test ": " (	("0" != $2)	? "OK" : "OOPS" )
+	print ++test ": " (	("0e2" == $1)	? "OK" : "OOPS" )
+
+	print ++test ": " (	(0 == "0")	? "OK" : "OOPS" )
+	print ++test ": " (	(0 == num)	? "OK" : "OOPS" )
+	print ++test ": " (	(0 != $2)	? "OK" : "OOPS" )
+	print ++test ": " (	(0 == $1)	? "OK" : "OOPS" )
+
+	print ++test ": " (	($1 != "0")	? "OK" : "OOPS" )
+	print ++test ": " (	($1 == num)	? "OK" : "OOPS" )
+	print ++test ": " (	($2 != 0)	? "OK" : "OOPS" )
+	print ++test ": " (	($2 != $1)	? "OK" : "OOPS" )
+	print ++test ": " (	($3 == 0)	? "OK" : "OOPS" )
+	print ++test ": " (	($3 == $1)	? "OK" : "OOPS" )
+	print ++test ": " (	($2 != $4)	? "OK" : "OOPS"	) # 15
+}
+{
+	a = "+2"
+	b = 2
+	if (NR % 2)
+		c = a + b
+	print ++test ": " (	(a != b)	? "OK" : "OOPS" ) # 16 and 22
+
+	d = "2a"
+	b = 2
+	if (NR % 2)
+		c = d + b
+	print ++test ": " (	(d != b)	? "OK" : "OOPS" )
+
+	print ++test ": " (	(d + 0 == b)	? "OK" : "OOPS" )
+
+	e = "2"
+	print ++test ": " (	(e == b "")	? "OK" : "OOPS" )
+
+	a = "2.13"
+	print ++test ": " (	(a == 2.13)	? "OK" : "OOPS" )
+
+	a = "2.130000"
+	print ++test ": " (	(a != 2.13)	? "OK" : "OOPS" )
+
+	if (NR == 2) {
+		CONVFMT = "%.6f"
+		print ++test ": " (	(a == 2.13)	? "OK" : "OOPS" )
+	}
+}'
diff --git a/contrib/awk/PROBLEMS b/contrib/awk/PROBLEMS
new file mode 100644
index 0000000..8f7d954
--- /dev/null
+++ b/contrib/awk/PROBLEMS
@@ -0,0 +1,10 @@
+This is a list of known problems in gawk 3.0.
+Hopefully they will all be fixed in the next major release of gawk.
+
+Please keep in mind that the code is still undergoing significant evolution.
+
+1. The interactions with the lexer and yyerror need reworking. It is possible
+   to get line numbers that are one line off if --compat or --posix is
+   true and either `nextfile' or `delete array' are used.
+
+   Really the whole lexical analysis stuff needs reworking.
diff --git a/contrib/awk/README b/contrib/awk/README
new file mode 100644
index 0000000..890b16d
--- /dev/null
+++ b/contrib/awk/README
@@ -0,0 +1,94 @@
+README:
+
+This is GNU Awk 3.0.3. It should be upwardly compatible with the Bell
+Labs research version of awk.  It is almost completely compliant with
+the 1993 POSIX 1003.2 standard for awk. (See the note below about POSIX.)
+
+Patches 1 through 3 just fix bugs -- see NEWS and ChangeLog for details.
+
+See the file INSTALL for installation instructions.
+
+Known problems are given in the PROBLEMS file.  Work to be done is
+described briefly in the FUTURES file.  Verified ports are listed in
+the PORTS file.  Changes in this version are summarized in the NEWS file.
+Please read the LIMITATIONS and ACKNOWLEDGMENT files.
+
+Read the file POSIX for a discussion of how the standard says comparisons
+should be done vs. how they really should be done and how gawk does them.
+  
+To format the documentation with TeX, use at least version 2.151 of
+texinfo.tex. There is a usable copy of texinfo.tex in the doc directory.
+
+The Info file now comes with the distribution.
+
+The man page is up to date.
+
+INSTALLATION:
+
+Check whether there is a system-specific README file for your system
+under the `README_d' directory.  If there's something there that you
+should have read and didn't, and you bug me about it, I'm going to yell
+at you.
+
+See the file INSTALL.
+
+If you have neither bison nor yacc, use the awktab.c file here.  It was
+generated with bison, and has no proprietary code in it.  (Note that modifying
+awk.y without bison or yacc will be difficult, at best.  You might want
+to get a copy of bison from the FSF too.)
+  
+If you have an MS-DOS or OS/2 system, use the stuff in the `pc' directory.
+Similarly, there are separate directories for Ataris, Amigas, and VMS.
+
+Appendix B of The GNU Awk User's Guide discusses configuration in detail. The
+configuration process is now based on Autoconf, so the whole business
+should be considerably simpler than it was in gawk 2.X.Y.
+
+After successful compilation, do 'make check' to run a small test
+suite.  There should be no output from the 'cmp' invocations except in
+the cases where there are small differences in floating point values,
+and possibly in the case of strftime. Several of the tests ignore
+errors on purpose; those are not a problem.  If there are other
+differences, please investigate and report the problem.
+
+PRINTING THE MANUAL
+
+The 'doc' directory contains a recent version of texinfo.tex, which will be
+necessary for printing the manual.  Use `make dvi' to get a DVI file from
+the manual. In the `doc' directory, use `make postscript' to get a PostScript
+version of the manual.
+
+BUG REPORTS AND FIXES (Un*x systems):
+
+Please coordinate changes through Arnold Robbins. In particular, see
+the section in the manual on reporting bugs. Note that comp.lang.awk is
+about the worst place to post a gawk bug report. Please, use the mechanisms
+outlined in the manual.
+
+Arnold Robbins
+INTERNET:	arnold@gnu.ai.mit.edu
+
+BUG REPORTS AND FIXES (non-Unix ports):
+
+MS-DOS:
+	Scott Deifik
+	scottd@amgen.com
+
+	Darrel Hankerson
+	hankedr@mail.auburn.edu
+
+VMS:
+	Pat Rankin
+	rankin@eql.caltech.edu
+
+Atari ST:
+	Michal Jaegermann
+	michal@gortel.phys.ualberta.ca
+
+OS/2:
+	Kai Uwe Rommel
+	rommel@ars.de
+
+Amiga:
+	Fred Fish
+	fnf@ninemoons.com
diff --git a/contrib/awk/README_d/README.FIRST b/contrib/awk/README_d/README.FIRST
new file mode 100644
index 0000000..2ebd5b7
--- /dev/null
+++ b/contrib/awk/README_d/README.FIRST
@@ -0,0 +1,21 @@
+Sat Feb 18 23:07:55 EST 1995
+
+Starting with 2.15.6, gawk will preserve the value of NF and $0 for
+the last record read into the END rule(s).  This is important to you
+if your program uses
+
+	print
+
+in an END rule to mean
+
+	print ""
+
+(i.e., print nothing).  Examine your awk programs carefully to make sure
+that they use `print ""' instead of `print', otherwise you will get
+strange results.
+
+If you send me email about this, without having read this
+file, I will yell at you.
+
+Arnold Robbins
+arnold@gnu.ai.mit.edu
diff --git a/contrib/awk/acconfig.h b/contrib/awk/acconfig.h
new file mode 100644
index 0000000..05f3c61
--- /dev/null
+++ b/contrib/awk/acconfig.h
@@ -0,0 +1,36 @@
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
+/* 
+ * Copyright (C) 1995-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+@TOP@
+
+#undef HAVE_STRINGIZE	/* can use ANSI # operator in cpp */
+#undef REGEX_MALLOC	/* use malloc instead of alloca in regex.c */
+#undef SPRINTF_RET	/* return type of sprintf */
+#undef BITOPS		/* bitwise ops (undocumented feature) */
+#undef NONDECDATA	/* non-decimal input data (undocumented feature) */
+
+@BOTTOM@
+
+#include <custom.h>	/* overrides for stuff autoconf can't deal with */
diff --git a/contrib/awk/array.c b/contrib/awk/array.c
new file mode 100644
index 0000000..b178cd2
--- /dev/null
+++ b/contrib/awk/array.c
@@ -0,0 +1,526 @@
+/*
+ * array.c - routines for associative arrays.
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991 - 97 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+/*
+ * Tree walks (``for (iggy in foo)'') and array deletions use expensive
+ * linear searching.  So what we do is start out with small arrays and
+ * grow them as needed, so that our arrays are hopefully small enough,
+ * most of the time, that they're pretty full and we're not looking at
+ * wasted space.
+ *
+ * The decision is made to grow the array if the average chain length is
+ * ``too big''. This is defined as the total number of entries in the table
+ * divided by the size of the array being greater than some constant.
+ */
+
+#define AVG_CHAIN_MAX	10   /* don't want to linear search more than this */
+
+#include "awk.h"
+
+static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
+static void grow_table P((NODE *symbol));
+
+/* concat_exp --- concatenate expression list into a single string */
+
+NODE *
+concat_exp(tree)
+register NODE *tree;
+{
+	register NODE *r;
+	char *str;
+	char *s;
+	size_t len;
+	int offset;
+	size_t subseplen;
+	char *subsep;
+
+	if (tree->type != Node_expression_list)
+		return force_string(tree_eval(tree));
+	r = force_string(tree_eval(tree->lnode));
+	if (tree->rnode == NULL)
+		return r;
+	subseplen = SUBSEP_node->lnode->stlen;
+	subsep = SUBSEP_node->lnode->stptr;
+	len = r->stlen + subseplen + 2;
+	emalloc(str, char *, len, "concat_exp");
+	memcpy(str, r->stptr, r->stlen+1);
+	s = str + r->stlen;
+	free_temp(r);
+	for (tree = tree->rnode; tree != NULL; tree = tree->rnode) {
+		if (subseplen == 1)
+			*s++ = *subsep;
+		else {
+			memcpy(s, subsep, subseplen+1);
+			s += subseplen;
+		}
+		r = force_string(tree_eval(tree->lnode));
+		len += r->stlen + subseplen;
+		offset = s - str;
+		erealloc(str, char *, len, "concat_exp");
+		s = str + offset;
+		memcpy(s, r->stptr, r->stlen+1);
+		s += r->stlen;
+		free_temp(r);
+	}
+	r = make_str_node(str, s - str, ALREADY_MALLOCED);
+	r->flags |= TEMP;
+	return r;
+}
+
+/* assoc_clear --- flush all the values in symbol[] before doing a split() */
+
+void
+assoc_clear(symbol)
+NODE *symbol;
+{
+	int i;
+	NODE *bucket, *next;
+
+	if (symbol->var_array == NULL)
+		return;
+	for (i = 0; i < symbol->array_size; i++) {
+		for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
+			next = bucket->ahnext;
+			unref(bucket->ahname);
+			unref(bucket->ahvalue);
+			freenode(bucket);
+		}
+		symbol->var_array[i] = NULL;
+	}
+	free(symbol->var_array);
+	symbol->var_array = NULL;
+	symbol->array_size = symbol->table_size = 0;
+	symbol->flags &= ~ARRAYMAXED;
+}
+
+/* hash --- calculate the hash function of the string in subs */
+
+unsigned int
+hash(s, len, hsize)
+register const char *s;
+register size_t len;
+unsigned long hsize;
+{
+	register unsigned long h = 0;
+
+	/*
+	 * This is INCREDIBLY ugly, but fast.  We break the string up into
+	 * 8 byte units.  On the first time through the loop we get the
+	 * "leftover bytes" (strlen % 8).  On every other iteration, we
+	 * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
+	 * saves us 7 cmp & branch instructions.  If this routine is
+	 * heavily used enough, it's worth the ugly coding.
+	 *
+	 * OZ's original sdbm hash, copied from Margo Seltzers db package.
+	 */
+
+	/*
+	 * Even more speed:
+	 * #define HASHC   h = *s++ + 65599 * h
+	 * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
+	 */
+#define HASHC   htmp = (h << 6);  \
+		h = *s++ + htmp + (htmp << 10) - h
+
+	unsigned long htmp;
+
+	h = 0;
+
+#if defined(VAXC)
+	/*	
+	 * This was an implementation of "Duff's Device", but it has been
+	 * redone, separating the switch for extra iterations from the
+	 * loop. This is necessary because the DEC VAX-C compiler is
+	 * STOOPID.
+	 */
+	switch (len & (8 - 1)) {
+	case 7:		HASHC;
+	case 6:		HASHC;
+	case 5:		HASHC;
+	case 4:		HASHC;
+	case 3:		HASHC;
+	case 2:		HASHC;
+	case 1:		HASHC;
+	default:	break;
+	}
+
+	if (len > (8 - 1)) {
+		register size_t loop = len >> 3;
+		do {
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+			HASHC;
+		} while (--loop);
+	}
+#else /* ! VAXC */
+	/* "Duff's Device" for those who can handle it */
+	if (len > 0) {
+		register size_t loop = (len + 8 - 1) >> 3;
+
+		switch (len & (8 - 1)) {
+		case 0:
+			do {	/* All fall throughs */
+				HASHC;
+		case 7:		HASHC;
+		case 6:		HASHC;
+		case 5:		HASHC;
+		case 4:		HASHC;
+		case 3:		HASHC;
+		case 2:		HASHC;
+		case 1:		HASHC;
+			} while (--loop);
+		}
+	}
+#endif /* ! VAXC */
+
+	if (h >= hsize)
+		h %= hsize;
+	return h;
+}
+
+/* assoc_find --- locate symbol[subs] */
+
+static NODE *				/* NULL if not found */
+assoc_find(symbol, subs, hash1)
+NODE *symbol;
+register NODE *subs;
+int hash1;
+{
+	register NODE *bucket;
+
+	for (bucket = symbol->var_array[hash1]; bucket != NULL;
+			bucket = bucket->ahnext) {
+		if (cmp_nodes(bucket->ahname, subs) == 0)
+			return bucket;
+	}
+	return NULL;
+}
+
+/* in_array --- test whether the array element symbol[subs] exists or not */
+
+int
+in_array(symbol, subs)
+NODE *symbol, *subs;
+{
+	register int hash1;
+	int ret;
+
+	if (symbol->type == Node_param_list)
+		symbol = stack_ptr[symbol->param_cnt];
+	if ((symbol->flags & SCALAR) != 0)
+		fatal("attempt to use scalar as array");
+	/*
+	 * evaluate subscript first, it could have side effects
+	 */
+	subs = concat_exp(subs);	/* concat_exp returns a string node */
+	if (symbol->var_array == NULL) {
+		free_temp(subs);
+		return 0;
+	}
+	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
+	ret = (assoc_find(symbol, subs, hash1) != NULL);
+	free_temp(subs);
+	return ret;
+}
+
+/*
+ * assoc_lookup:
+ * Find SYMBOL[SUBS] in the assoc array.  Install it with value "" if it
+ * isn't there. Returns a pointer ala get_lhs to where its value is stored.
+ *
+ * SYMBOL is the address of the node (or other pointer) being dereferenced.
+ * SUBS is a number or string used as the subscript. 
+ */
+
+NODE **
+assoc_lookup(symbol, subs)
+NODE *symbol, *subs;
+{
+	register int hash1;
+	register NODE *bucket;
+
+	(void) force_string(subs);
+
+	if ((symbol->flags & SCALAR) != 0)
+		fatal("attempt to use scalar as array");
+
+	if (symbol->var_array == NULL) {
+		symbol->type = Node_var_array;
+		symbol->array_size = symbol->table_size = 0;	/* sanity */
+		symbol->flags &= ~ARRAYMAXED;
+		grow_table(symbol);
+		hash1 = hash(subs->stptr, subs->stlen,
+				(unsigned long) symbol->array_size);
+	} else {
+		hash1 = hash(subs->stptr, subs->stlen,
+				(unsigned long) symbol->array_size);
+		bucket = assoc_find(symbol, subs, hash1);
+		if (bucket != NULL) {
+			free_temp(subs);
+			return &(bucket->ahvalue);
+		}
+	}
+
+	/* It's not there, install it. */
+	if (do_lint && subs->stlen == 0)
+		warning("subscript of array `%s' is null string",
+			symbol->vname);
+
+	/* first see if we would need to grow the array, before installing */
+	symbol->table_size++;
+	if ((symbol->flags & ARRAYMAXED) == 0
+	    && symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) {
+		grow_table(symbol);
+		/* have to recompute hash value for new size */
+		hash1 = hash(subs->stptr, subs->stlen,
+				(unsigned long) symbol->array_size);
+	}
+
+	getnode(bucket);
+	bucket->type = Node_ahash;
+	if (subs->flags & TEMP)
+		bucket->ahname = dupnode(subs);
+	else {
+		unsigned int saveflags = subs->flags;
+
+		subs->flags &= ~MALLOC;
+		bucket->ahname = dupnode(subs);
+		subs->flags = saveflags;
+	}
+	free_temp(subs);
+
+	/* array subscripts are strings */
+	bucket->ahname->flags &= ~NUMBER;
+	bucket->ahname->flags |= STRING;
+	bucket->ahvalue = Nnull_string;
+	bucket->ahnext = symbol->var_array[hash1];
+	symbol->var_array[hash1] = bucket;
+	return &(bucket->ahvalue);
+}
+
+/* do_delete --- perform `delete array[s]' */
+
+void
+do_delete(symbol, tree)
+NODE *symbol, *tree;
+{
+	register int hash1;
+	register NODE *bucket, *last;
+	NODE *subs;
+
+	if (symbol->type == Node_param_list) {
+		symbol = stack_ptr[symbol->param_cnt];
+		if (symbol->type == Node_var)
+			return;
+	}
+	if (symbol->type == Node_var_array) {
+		if (symbol->var_array == NULL)
+			return;
+	} else
+		fatal("delete: illegal use of variable `%s' as array",
+			symbol->vname);
+
+	if (tree == NULL) {	/* delete array */
+		assoc_clear(symbol);
+		return;
+	}
+
+	subs = concat_exp(tree);	/* concat_exp returns string node */
+	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
+
+	last = NULL;
+	for (bucket = symbol->var_array[hash1]; bucket != NULL;
+			last = bucket, bucket = bucket->ahnext)
+		if (cmp_nodes(bucket->ahname, subs) == 0)
+			break;
+	free_temp(subs);
+	if (bucket == NULL) {
+		if (do_lint)
+			warning("delete: index `%s' not in array `%s'",
+				subs->stptr, symbol->vname);
+		return;
+	}
+	if (last != NULL)
+		last->ahnext = bucket->ahnext;
+	else
+		symbol->var_array[hash1] = bucket->ahnext;
+	unref(bucket->ahname);
+	unref(bucket->ahvalue);
+	freenode(bucket);
+	symbol->table_size--;
+	if (symbol->table_size <= 0) {
+		memset(symbol->var_array, '\0',
+			sizeof(NODE *) * symbol->array_size);
+		symbol->table_size = symbol->array_size = 0;
+		symbol->flags &= ~ARRAYMAXED;
+		free((char *) symbol->var_array);
+		symbol->var_array = NULL;
+	}
+}
+
+/* assoc_scan --- start a ``for (iggy in foo)'' loop */
+
+void
+assoc_scan(symbol, lookat)
+NODE *symbol;
+struct search *lookat;
+{
+	lookat->sym = symbol;
+	lookat->idx = 0;
+	lookat->bucket = NULL;
+	lookat->retval = NULL;
+	if (symbol->var_array != NULL)
+		assoc_next(lookat);
+}
+
+/* assoc_next --- actually find the next element in array */
+
+void
+assoc_next(lookat)
+struct search *lookat;
+{
+	register NODE *symbol = lookat->sym;
+	
+	if (symbol == NULL)
+		fatal("null symbol in assoc_next");
+	if (symbol->var_array == NULL || lookat->idx > symbol->array_size) {
+		lookat->retval = NULL;
+		return;
+	}
+	/*
+	 * This is theoretically unsafe.  The element bucket might have
+	 * been freed if the body of the scan did a delete on the next
+	 * element of the bucket.  The only way to do that is by array
+	 * reference, which is unlikely.  Basically, if the user is doing
+	 * anything other than an operation on the current element of an
+	 * assoc array while walking through it sequentially, all bets are
+	 * off.  (The safe way is to register all search structs on an
+	 * array with the array, and update all of them on a delete or
+	 * insert)
+	 */
+	if (lookat->bucket != NULL) {
+		lookat->retval = lookat->bucket->ahname;
+		lookat->bucket = lookat->bucket->ahnext;
+		return;
+	}
+	for (; lookat->idx < symbol->array_size; lookat->idx++) {
+		NODE *bucket;
+
+		if ((bucket = symbol->var_array[lookat->idx]) != NULL) {
+			lookat->retval = bucket->ahname;
+			lookat->bucket = bucket->ahnext;
+			lookat->idx++;
+			return;
+		}
+	}
+	lookat->retval = NULL;
+	lookat->bucket = NULL;
+	return;
+}
+
+/* grow_table --- grow a hash table */
+
+static void
+grow_table(symbol)
+NODE *symbol;
+{
+	NODE **old, **new, *chain, *next;
+	int i, j;
+	unsigned long hash1;
+	unsigned long oldsize, newsize;
+	/*
+	 * This is an array of primes. We grow the table by an order of
+	 * magnitude each time (not just doubling) so that growing is a
+	 * rare operation. We expect, on average, that it won't happen
+	 * more than twice.  The final size is also chosen to be small
+	 * enough so that MS-DOG mallocs can handle it. When things are
+	 * very large (> 8K), we just double more or less, instead of
+	 * just jumping from 8K to 64K.
+	 */
+	static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497,
+#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist)
+				131101, 262147, 524309, 1048583, 2097169,
+				4194319, 8388617, 16777259, 33554467, 
+				67108879, 134217757, 268435459, 536870923,
+				1073741827
+#endif
+	};
+
+	/* find next biggest hash size */
+	newsize = oldsize = symbol->array_size;
+	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
+		if (oldsize < sizes[i]) {
+			newsize = sizes[i];
+			break;
+		}
+	}
+
+	if (newsize == oldsize) {	/* table already at max (!) */
+		symbol->flags |= ARRAYMAXED;
+		return;
+	}
+
+	/* allocate new table */
+	emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
+	memset(new, '\0', newsize * sizeof(NODE *));
+
+	/* brand new hash table, set things up and return */
+	if (symbol->var_array == NULL) {
+		symbol->table_size = 0;
+		goto done;
+	}
+
+	/* old hash table there, move stuff to new, free old */
+	old = symbol->var_array;
+	for (i = 0; i < oldsize; i++) {
+		if (old[i] == NULL)
+			continue;
+
+		for (chain = old[i]; chain != NULL; chain = next) {
+			next = chain->ahnext;
+			hash1 = hash(chain->ahname->stptr,
+					chain->ahname->stlen, newsize);
+
+			/* remove from old list, add to new */
+			chain->ahnext = new[hash1];
+			new[hash1] = chain;
+
+		}
+	}
+	free(old);
+
+done:
+	/*
+	 * note that symbol->table_size does not change if an old array,
+	 * and is explicitly set to 0 if a new one.
+	 */
+	symbol->var_array = new;
+	symbol->array_size = newsize;
+}
diff --git a/contrib/awk/awk.h b/contrib/awk/awk.h
new file mode 100644
index 0000000..630144d
--- /dev/null
+++ b/contrib/awk/awk.h
@@ -0,0 +1,882 @@
+/*
+ * awk.h -- Definitions for gawk. 
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+/* ------------------------------ Includes ------------------------------ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE	1	/* enable GNU extensions */
+#endif /* _GNU_SOURCE */
+
+#include <stdio.h>
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif /* HAVE_LIMITS_H */
+#include <ctype.h>
+#include <setjmp.h>
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif /* HAVE_LOCALE_H */
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#if ! defined(errno) && ! defined(MSDOS) && ! defined(OS2)
+extern int errno;
+#endif
+#ifdef HAVE_SIGNUM_H
+#include <signum.h>
+#endif
+
+/* ----------------- System dependencies (with more includes) -----------*/
+
+/* This section is the messiest one in the file, not a lot that can be done */
+
+/* First, get the ctype stuff right; from Jim Meyering */
+#if defined(STDC_HEADERS) || (!defined(isascii) && !defined(HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (ISASCII(c) && isblank(c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (ISASCII(c) && isgraph(c))
+#else
+#define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
+#endif
+
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+
+#ifdef __STDC__
+#define	P(s)	s
+#define MALLOC_ARG_T size_t
+#else	/* not __STDC__ */
+#define	P(s)	()
+#define MALLOC_ARG_T unsigned
+#define volatile
+#define const
+#endif	/* not __STDC__ */
+
+#if ! defined(VMS) || (! defined(VAXC) && ! defined(__DECC))
+#include <sys/types.h>
+#include <sys/stat.h>
+#else	/* VMS w/ VAXC or DECC */
+#include <types.h>
+#include <stat.h>
+#include <file.h>	/* avoid <fcntl.h> in io.c */
+#ifdef __DECC
+/* DEC C implies DECC$SHR, which doesn't have the %g problem of VAXCRTL */
+#undef GFMT_WORKAROUND
+#endif
+#endif	/* VMS w/ VAXC or DECC */
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else	/* not STDC_HEADERS */
+#include "protos.h"
+#endif	/* not STDC_HEADERS */
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#ifdef NEED_MEMORY_H
+#include <memory.h>
+#endif	/* NEED_MEMORY_H */
+#else	/* not HAVE_STRING_H */
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif	/* HAVE_STRINGS_H */
+#endif	/* not HAVE_STRING_H */
+
+#ifdef NeXT
+#if __GNUC__ < 2 || __GNUC_MINOR__ < 7
+#include <libc.h>
+#endif
+#undef atof
+#define getopt GNU_getopt
+#define GFMT_WORKAROUND
+#endif	/* NeXT */
+
+#if defined(atarist) || defined(VMS)
+#include <unixlib.h>
+#endif	/* atarist || VMS */
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif	/* HAVE_UNISTD_H */
+
+#ifndef HAVE_VPRINTF
+/* if you don't have vprintf, try this and cross your fingers. */
+#ifdef	HAVE_DOPRNT
+#define vfprintf(fp,fmt,arg)	_doprnt((fmt), (arg), (fp))
+#else	/* not HAVE_DOPRNT */
+you
+lose
+#endif	/* not HAVE_DOPRNT */
+#endif	/* HAVE_VPRINTF */
+
+#ifndef HAVE_SETLOCALE
+#define setlocale(locale, val)	/* nothing */
+#endif /* HAVE_SETLOCALE */
+
+#ifdef VMS
+#include "vms/redirect.h"
+#endif  /*VMS*/
+
+#ifdef atarist
+#include "atari/redirect.h"
+#endif
+
+#define	GNU_REGEX
+#ifdef GNU_REGEX
+#include "regex.h"
+#include "dfa.h"
+typedef struct Regexp {
+	struct re_pattern_buffer pat;
+	struct re_registers regs;
+	struct dfa dfareg;
+	int dfa;
+} Regexp;
+#define	RESTART(rp,s)	(rp)->regs.start[0]
+#define	REEND(rp,s)	(rp)->regs.end[0]
+#define	SUBPATSTART(rp,s,n)	(rp)->regs.start[n]
+#define	SUBPATEND(rp,s,n)	(rp)->regs.end[n]
+#endif	/* GNU_REGEX */
+
+/* ------------------ Constants, Structures, Typedefs  ------------------ */
+
+#ifndef AWKNUM
+#define AWKNUM	double
+#endif
+
+#ifndef TRUE
+/* a bit hackneyed, but what the heck */
+#define TRUE	1
+#define FALSE	0
+#endif
+
+/* Figure out what '\a' really is. */
+#ifdef __STDC__
+#define BELL	'\a'		/* sure makes life easy, don't it? */
+#else
+#	if 'z' - 'a' == 25	/* ascii */
+#		if 'a' != 97	/* machine is dumb enough to use mark parity */
+#			define BELL	'\207'
+#		else
+#			define BELL	'\07'
+#		endif
+#	else
+#		define BELL	'\057'
+#	endif
+#endif
+
+typedef enum nodevals {
+	/* illegal entry == 0 */
+	Node_illegal,
+
+	/* binary operators  lnode and rnode are the expressions to work on */
+	Node_times,
+	Node_quotient,
+	Node_mod,
+	Node_plus,
+	Node_minus,
+	Node_cond_pair,		/* conditional pair (see Node_line_range) */
+	Node_subscript,
+	Node_concat,
+	Node_exp,
+
+	/* unary operators   subnode is the expression to work on */
+	Node_preincrement,
+	Node_predecrement,
+	Node_postincrement,
+	Node_postdecrement,
+	Node_unary_minus,
+	Node_field_spec,
+
+	/* assignments   lnode is the var to assign to, rnode is the exp */
+	Node_assign,
+	Node_assign_times,
+	Node_assign_quotient,
+	Node_assign_mod,
+	Node_assign_plus,
+	Node_assign_minus,
+	Node_assign_exp,
+
+	/* boolean binaries   lnode and rnode are expressions */
+	Node_and,
+	Node_or,
+
+	/* binary relationals   compares lnode and rnode */
+	Node_equal,
+	Node_notequal,
+	Node_less,
+	Node_greater,
+	Node_leq,
+	Node_geq,
+	Node_match,
+	Node_nomatch,
+
+	/* unary relationals   works on subnode */
+	Node_not,
+
+	/* program structures */
+	Node_rule_list,		/* lnode is a rule, rnode is rest of list */
+	Node_rule_node,		/* lnode is pattern, rnode is statement */
+	Node_statement_list,	/* lnode is statement, rnode is more list */
+	Node_if_branches,	/* lnode is to run on true, rnode on false */
+	Node_expression_list,	/* lnode is an exp, rnode is more list */
+	Node_param_list,	/* lnode is a variable, rnode is more list */
+
+	/* keywords */
+	Node_K_if,		/* lnode is conditonal, rnode is if_branches */
+	Node_K_while,		/* lnode is condtional, rnode is stuff to run */
+	Node_K_for,		/* lnode is for_struct, rnode is stuff to run */
+	Node_K_arrayfor,	/* lnode is for_struct, rnode is stuff to run */
+	Node_K_break,		/* no subs */
+	Node_K_continue,	/* no subs */
+	Node_K_print,		/* lnode is exp_list, rnode is redirect */
+	Node_K_printf,		/* lnode is exp_list, rnode is redirect */
+	Node_K_next,		/* no subs */
+	Node_K_exit,		/* subnode is return value, or NULL */
+	Node_K_do,		/* lnode is conditional, rnode stuff to run */
+	Node_K_return,		/* lnode is return value */
+	Node_K_delete,		/* lnode is array, rnode is subscript */
+	Node_K_getline,		/* lnode is opt var, rnode is redirection */
+	Node_K_function,	/* lnode is statement list, rnode is params */
+	Node_K_nextfile,	/* no subs */
+
+	/* I/O redirection for print statements */
+	Node_redirect_output,	/* subnode is where to redirect */
+	Node_redirect_append,	/* subnode is where to redirect */
+	Node_redirect_pipe,	/* subnode is where to redirect */
+	Node_redirect_pipein,	/* subnode is where to redirect */
+	Node_redirect_input,	/* subnode is where to redirect */
+
+	/* Variables */
+	Node_var,		/* rnode is value, lnode is array stuff */
+	Node_var_array,		/* array is ptr to elements, asize num of eles */
+	Node_val,		/* node is a value - type in flags */
+
+	/* Builtins   subnode is explist to work on, proc is func to call */
+	Node_builtin,
+
+	/*
+	 * pattern: conditional ',' conditional ;  lnode of Node_line_range
+	 * is the two conditionals (Node_cond_pair), other word (rnode place)
+	 * is a flag indicating whether or not this range has been entered.
+	 */
+	Node_line_range,
+
+	/*
+	 * boolean test of membership in array lnode is string-valued
+	 * expression rnode is array name 
+	 */
+	Node_in_array,
+
+	Node_func,		/* lnode is param. list, rnode is body */
+	Node_func_call,		/* lnode is name, rnode is argument list */
+
+	Node_cond_exp,		/* lnode is conditonal, rnode is if_branches */
+	Node_regex,		/* a regexp, text, compiled, flags, etc */
+	Node_hashnode,		/* an identifier in the symbol table */
+	Node_ahash,		/* an array element */
+	Node_NF,		/* variables recognized in the grammar */
+	Node_NR,
+	Node_FNR,
+	Node_FS,
+	Node_RS,
+	Node_FIELDWIDTHS,
+	Node_IGNORECASE,
+	Node_OFS,
+	Node_ORS,
+	Node_OFMT,
+	Node_CONVFMT,
+	Node_final		/* sentry value, not legal */
+} NODETYPE;
+
+/*
+ * NOTE - this struct is a rather kludgey -- it is packed to minimize
+ * space usage, at the expense of cleanliness.  Alter at own risk.
+ */
+typedef struct exp_node {
+	union {
+		struct {
+			union {
+				struct exp_node *lptr;
+				char *param_name;
+				long ll;
+			} l;
+			union {
+				struct exp_node *rptr;
+				struct exp_node *(*pptr)();
+				Regexp *preg;
+				struct for_loop_header *hd;
+				struct exp_node **av;
+				int r_ent;	/* range entered */
+			} r;
+			union {
+				struct exp_node *extra;
+				long xl;
+			} x;
+			char *name;
+			short number;
+			unsigned char reflags;
+#				define	CASE	1
+#				define	CONST	2
+#				define	FS_DFLT	4
+		} nodep;
+		struct {
+			AWKNUM fltnum;	/* this is here for optimal packing of
+					 * the structure on many machines
+					 */
+			char *sp;
+			size_t slen;
+			long sref;
+			int idx;
+		} val;
+		struct {
+			struct exp_node *next;
+			char *name;
+			size_t length;
+			struct exp_node *value;
+		} hash;
+#define	hnext	sub.hash.next
+#define	hname	sub.hash.name
+#define	hlength	sub.hash.length
+#define	hvalue	sub.hash.value
+		struct {
+			struct exp_node *next;
+			struct exp_node *name;
+			struct exp_node *value;
+		} ahash;
+#define	ahnext	sub.ahash.next
+#define	ahname	sub.ahash.name
+#define	ahvalue	sub.ahash.value
+	} sub;
+	NODETYPE type;
+	unsigned short flags;
+#		define	MALLOC	1	/* can be free'd */
+#		define	TEMP	2	/* should be free'd */
+#		define	PERM	4	/* can't be free'd */
+#		define	STRING	8	/* assigned as string */
+#		define	STR	16	/* string value is current */
+#		define	NUM	32	/* numeric value is current */
+#		define	NUMBER	64	/* assigned as number */
+#		define	MAYBE_NUM 128	/* user input: if NUMERIC then
+					 * a NUMBER */
+#		define	ARRAYMAXED 256	/* array is at max size */
+#		define	SCALAR     512	/* used as scalar, can't be array */
+#		define	FUNC	1024	/* this parameter is really a
+					 * function name; see awk.y */
+#		define	FIELD	2048	/* this is a field */
+
+	char *vname;    /* variable's name */
+} NODE;
+
+#define lnode	sub.nodep.l.lptr
+#define nextp	sub.nodep.l.lptr
+#define rnode	sub.nodep.r.rptr
+#define source_file	sub.nodep.name
+#define	source_line	sub.nodep.number
+#define	param_cnt	sub.nodep.number
+#define param	sub.nodep.l.param_name
+
+#define subnode	lnode
+#define proc	sub.nodep.r.pptr
+
+#define re_reg	sub.nodep.r.preg
+#define re_flags sub.nodep.reflags
+#define re_text lnode
+#define re_exp	sub.nodep.x.extra
+#define	re_cnt	sub.nodep.number
+
+#define forsub	lnode
+#define forloop	rnode->sub.nodep.r.hd
+
+#define stptr	sub.val.sp
+#define stlen	sub.val.slen
+#define stref	sub.val.sref
+#define	stfmt	sub.val.idx
+
+#define numbr	sub.val.fltnum
+
+#define var_value lnode
+#define var_array sub.nodep.r.av
+#define array_size sub.nodep.l.ll
+#define table_size sub.nodep.x.xl
+
+#define condpair lnode
+#define triggered sub.nodep.r.r_ent
+
+/* a regular for loop */
+typedef struct for_loop_header {
+	NODE *init;
+	NODE *cond;
+	NODE *incr;
+} FOR_LOOP_HEADER;
+
+/* for "for(iggy in foo) {" */
+struct search {
+	NODE *sym;
+	size_t idx;
+	NODE *bucket;
+	NODE *retval;
+};
+
+/* for faster input, bypass stdio */
+typedef struct iobuf {
+	const char *name;
+	int fd;
+	char *buf;
+	char *off;
+	char *end;
+	size_t size;	/* this will be determined by an fstat() call */
+	int cnt;
+	long secsiz;
+	int flag;
+#		define	IOP_IS_TTY	1
+#		define	IOP_IS_INTERNAL	2
+#		define	IOP_NO_FREE	4
+#		define	IOP_MMAPPED	8
+#		define	IOP_NOFREE_OBJ	16
+	int (*getrec)();
+} IOBUF;
+
+typedef void (*Func_ptr)();
+
+/* structure used to dynamically maintain a linked-list of open files/pipes */
+struct redirect {
+	unsigned int flag;
+#		define	RED_FILE	1
+#		define	RED_PIPE	2
+#		define	RED_READ	4
+#		define	RED_WRITE	8
+#		define	RED_APPEND	16
+#		define	RED_NOBUF	32
+#		define	RED_USED	64	/* closed temporarily to reuse fd */
+#		define	RED_EOF		128
+	char *value;
+	FILE *fp;
+	FILE *ifp;	/* input fp, needed for PIPES_SIMULATED */
+	IOBUF *iop;
+	int pid;
+	int status;
+	struct redirect *prev;
+	struct redirect *next;
+};
+
+/* structure for our source, either a command line string or a source file */
+struct src {
+       enum srctype { CMDLINE = 1, SOURCEFILE } stype;
+       char *val;
+};
+
+/* longjmp return codes, must be nonzero */
+/* Continue means either for loop/while continue, or next input record */
+#define TAG_CONTINUE 1
+/* Break means either for/while break, or stop reading input */
+#define TAG_BREAK 2
+/* Return means return from a function call; leave value in ret_node */
+#define	TAG_RETURN 3
+
+#ifndef LONG_MAX
+#define LONG_MAX ((long)(~(1L << (sizeof (long) * 8 - 1))))
+#endif
+#ifndef ULONG_MAX
+#define ULONG_MAX (~(unsigned long)0)
+#endif
+#ifndef LONG_MIN
+#define LONG_MIN ((long)(-LONG_MAX - 1L))
+#endif
+#define HUGE    LONG_MAX 
+
+/* -------------------------- External variables -------------------------- */
+/* gawk builtin variables */
+extern long NF;
+extern long NR;
+extern long FNR;
+extern int IGNORECASE;
+extern int RS_is_null;
+extern char *OFS;
+extern int OFSlen;
+extern char *ORS;
+extern int ORSlen;
+extern char *OFMT;
+extern char *CONVFMT;
+extern int CONVFMTidx;
+extern int OFMTidx;
+extern NODE *CONVFMT_node, *FIELDWIDTHS_node, *FILENAME_node;
+extern NODE *FNR_node, *FS_node, *IGNORECASE_node, *NF_node;
+extern NODE *NR_node, *OFMT_node, *OFS_node, *ORS_node, *RLENGTH_node;
+extern NODE *RSTART_node, *RS_node, *RT_node, *SUBSEP_node; 
+extern NODE **stack_ptr;
+extern NODE *Nnull_string;
+extern NODE **fields_arr;
+extern int sourceline;
+extern char *source;
+extern NODE *expression_value;
+
+#if __GNUC__ < 2
+extern NODE *_t;	/* used as temporary in tree_eval */
+#endif
+
+extern NODE *nextfree;
+extern int field0_valid;
+extern int do_traditional;
+extern int do_posix;
+extern int do_lint;
+extern int do_lint_old;
+extern int do_intervals;
+extern int in_begin_rule;
+extern int in_end_rule;
+
+extern const char *myname;
+
+extern char quote;
+extern char *defpath;
+extern char envsep;
+
+extern char casetable[];	/* for case-independent regexp matching */
+
+/* ------------------------- Pseudo-functions ------------------------- */
+
+#define is_identchar(c)		(isalnum(c) || (c) == '_')
+#define isnondecimal(str)	(((str)[0]) == '0')
+
+#ifdef MPROF
+#define	getnode(n)	emalloc(n, NODE *, sizeof(NODE), "getnode")
+#define	freenode(n)	free(n)
+#else	/* not MPROF */
+#define	getnode(n)	if (nextfree) n = nextfree, nextfree = nextfree->nextp;\
+			else n = more_nodes()
+#define	freenode(n)	((n)->flags &= ~SCALAR, (n)->nextp = nextfree, nextfree = (n))
+#endif	/* not MPROF */
+
+#ifdef DEBUG
+#undef freenode
+#define	get_lhs(p, a)	r_get_lhs((p), (a))
+#define	m_tree_eval(t, iscond)	r_tree_eval(t, iscond)
+#else
+#define	get_lhs(p, a)	((p)->type == Node_var ? (&(p)->var_value) : \
+			r_get_lhs((p), (a)))
+#if __GNUC__ >= 2
+#define	m_tree_eval(t, iscond) \
+                        ({NODE * _t = (t);                 \
+			   if (_t == NULL)                 \
+			       _t = Nnull_string;          \
+			   else {                          \
+			       switch(_t->type) {          \
+			       case Node_val:              \
+				   break;                  \
+			       case Node_var:              \
+				   _t = _t->var_value;     \
+				   break;                  \
+			       default:                    \
+				   _t = r_tree_eval(_t, iscond);\
+				   break;                  \
+			       }                           \
+			   }                               \
+			   _t;})
+#else
+#define	m_tree_eval(t, iscond)	(_t = (t), _t == NULL ? Nnull_string : \
+			(_t->type == Node_param_list ? \
+			  r_tree_eval(_t, iscond) : \
+			(_t->type == Node_val ? _t : \
+			(_t->type == Node_var ? _t->var_value : \
+			  r_tree_eval(_t, iscond)))))
+#endif /* __GNUC__ */
+#endif /* not DEBUG */
+#define tree_eval(t)	m_tree_eval(t, FALSE)
+
+#define	make_number(x)	mk_number((x), (unsigned int)(MALLOC|NUM|NUMBER))
+#define	tmp_number(x)	mk_number((x), (unsigned int)(MALLOC|TEMP|NUM|NUMBER))
+
+#define	free_temp(n)	do { if ((n)->flags&TEMP) { unref(n); }} while (FALSE)
+#define	make_string(s, l)	make_str_node((s), (size_t) (l), FALSE)
+#define		SCAN			1
+#define		ALREADY_MALLOCED	2
+
+#define	cant_happen()	r_fatal("internal error line %d, file: %s", \
+				__LINE__, __FILE__);
+
+#ifdef HAVE_STRINGIZE
+#define	emalloc(var,ty,x,str)	(void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+				 (fatal("%s: %s: can't allocate memory (%s)",\
+					(str), #var, strerror(errno)),0))
+#define	erealloc(var,ty,x,str)	(void)((var=(ty)realloc((char *)var,\
+						  (MALLOC_ARG_T)(x))) ||\
+				 (fatal("%s: %s: can't allocate memory (%s)",\
+					(str), #var, strerror(errno)),0))
+#else /* HAVE_STRINGIZE */
+#define	emalloc(var,ty,x,str)	(void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+				 (fatal("%s: %s: can't allocate memory (%s)",\
+					(str), "var", strerror(errno)),0))
+#define	erealloc(var,ty,x,str)	(void)((var=(ty)realloc((char *)var,\
+						  (MALLOC_ARG_T)(x))) ||\
+				 (fatal("%s: %s: can't allocate memory (%s)",\
+					(str), "var", strerror(errno)),0))
+#endif /* HAVE_STRINGIZE */
+
+#ifdef DEBUG
+#define	force_number	r_force_number
+#define	force_string	r_force_string
+#else /* not DEBUG */
+#ifdef lint
+extern AWKNUM force_number();
+#endif
+#if __GNUC__ >= 2
+#define	force_number(n)	({NODE *_tn = (n);\
+			(_tn->flags & NUM) ?_tn->numbr : r_force_number(_tn);})
+#define	force_string(s)	({NODE *_ts = (s);\
+			  ((_ts->flags & STR) && \
+			   (_ts->stfmt == -1 || _ts->stfmt == CONVFMTidx)) ?\
+			  _ts : r_force_string(_ts);})
+#else
+#ifdef MSDOS
+extern double _msc51bug;
+#define	force_number(n)	(_msc51bug=(_t = (n),\
+			  (_t->flags & NUM) ? _t->numbr : r_force_number(_t)))
+#else /* not MSDOS */
+#define	force_number(n)	(_t = (n),\
+			 (_t->flags & NUM) ? _t->numbr : r_force_number(_t))
+#endif /* not MSDOS */
+#define	force_string(s)	(_t = (s),((_t->flags & STR) && \
+				   (_t->stfmt == -1 || \
+				    _t->stfmt == CONVFMTidx))? \
+			 _t : r_force_string(_t))
+#endif /* not __GNUC__ */
+#endif /* not DEBUG */
+
+#define	STREQ(a,b)	(*(a) == *(b) && strcmp((a), (b)) == 0)
+#define	STREQN(a,b,n)	((n) && *(a)== *(b) && \
+			 strncmp((a), (b), (size_t) (n)) == 0)
+
+#define fatal		set_loc(__FILE__, __LINE__), r_fatal
+
+/* ------------- Function prototypes or defs (as appropriate) ------------- */
+
+/* array.c */
+extern NODE *concat_exp P((NODE *tree));
+extern void assoc_clear P((NODE *symbol));
+extern unsigned int hash P((const char *s, size_t len, unsigned long hsize));
+extern int in_array P((NODE *symbol, NODE *subs));
+extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
+extern void do_delete P((NODE *symbol, NODE *tree));
+extern void assoc_scan P((NODE *symbol, struct search *lookat));
+extern void assoc_next P((struct search *lookat));
+/* awktab.c */
+extern char *tokexpand P((void));
+extern char nextc P((void));
+extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
+extern NODE *install P((char *name, NODE *value));
+extern NODE *lookup P((const char *name));
+extern NODE *variable P((char *name, int can_free, NODETYPE type));
+extern int yyparse P((void));
+/* builtin.c */
+extern double double_to_int P((double d));
+extern NODE *do_exp P((NODE *tree));
+extern NODE *do_fflush P((NODE *tree));
+extern NODE *do_index P((NODE *tree));
+extern NODE *do_int P((NODE *tree));
+extern NODE *do_length P((NODE *tree));
+extern NODE *do_log P((NODE *tree));
+extern NODE *do_sprintf P((NODE *tree));
+extern void do_printf P((NODE *tree));
+extern void print_simple P((NODE *tree, FILE *fp));
+extern NODE *do_sqrt P((NODE *tree));
+extern NODE *do_substr P((NODE *tree));
+extern NODE *do_strftime P((NODE *tree));
+extern NODE *do_systime P((NODE *tree));
+extern NODE *do_system P((NODE *tree));
+extern void do_print P((NODE *tree));
+extern NODE *do_tolower P((NODE *tree));
+extern NODE *do_toupper P((NODE *tree));
+extern NODE *do_atan2 P((NODE *tree));
+extern NODE *do_sin P((NODE *tree));
+extern NODE *do_cos P((NODE *tree));
+extern NODE *do_rand P((NODE *tree));
+extern NODE *do_srand P((NODE *tree));
+extern NODE *do_match P((NODE *tree));
+extern NODE *do_gsub P((NODE *tree));
+extern NODE *do_sub P((NODE *tree));
+extern NODE *do_gensub P((NODE *tree));
+#ifdef BITOPS
+extern NODE *do_lshift P((NODE *tree));
+extern NODE *do_rshift P((NODE *tree));
+extern NODE *do_and P((NODE *tree));
+extern NODE *do_or P((NODE *tree));
+extern NODE *do_xor P((NODE *tree));
+extern NODE *do_compl P((NODE *tree));
+extern NODE *do_strtonum P((NODE *tree));
+#endif /* BITOPS */
+#if defined(BITOPS) || defined(NONDECDATA)
+extern AWKNUM nondec2awknum P((char *str, size_t len));
+#endif /* defined(BITOPS) || defined(NONDECDATA) */
+/* eval.c */
+extern int interpret P((NODE *volatile tree));
+extern NODE *r_tree_eval P((NODE *tree, int iscond));
+extern int cmp_nodes P((NODE *t1, NODE *t2));
+extern NODE **r_get_lhs P((NODE *ptr, Func_ptr *assign));
+extern void set_IGNORECASE P((void));
+void set_OFS P((void));
+void set_ORS P((void));
+void set_OFMT P((void));
+void set_CONVFMT P((void));
+/* field.c */
+extern void init_fields P((void));
+extern void set_record P((char *buf, int cnt, int freeold));
+extern void reset_record P((void));
+extern void set_NF P((void));
+extern NODE **get_field P((long num, Func_ptr *assign));
+extern NODE *do_split P((NODE *tree));
+extern void set_FS P((void));
+extern void set_FS_if_not_FIELDWIDTHS P((void));
+extern void set_RS P((void));
+extern void set_FIELDWIDTHS P((void));
+extern int using_fieldwidths P((void));
+/* gawkmisc.c */
+extern char *gawk_name P((const char *filespec));
+extern void os_arg_fixup P((int *argcp, char ***argvp));
+extern int os_devopen P((const char *name, int flag));
+extern int optimal_bufsize P((int fd, struct stat *sbuf));
+extern int ispath P((const char *file));
+extern int isdirpunct P((int c));
+/* io.c */
+extern void set_FNR P((void));
+extern void set_NR P((void));
+extern void do_input P((void));
+extern struct redirect *redirect P((NODE *tree, int *errflg));
+extern NODE *do_close P((NODE *tree));
+extern int flush_io P((void));
+extern int close_io P((void));
+extern int devopen P((const char *name, const char *mode));
+extern int pathopen P((const char *file));
+extern NODE *do_getline P((NODE *tree));
+extern void do_nextfile P((void));
+extern struct redirect *getredirect P((char *str, int len));
+/* main.c */
+extern int main P((int argc, char **argv));
+extern void load_environ P((void));
+extern char *arg_assign P((char *arg));
+extern RETSIGTYPE catchsig P((int sig, int code));
+/* msg.c */
+extern void err P((const char *s, const char *emsg, va_list argp));
+#if _MSC_VER == 510
+extern void msg P((va_list va_alist, ...));
+extern void error P((va_list va_alist, ...));
+extern void warning P((va_list va_alist, ...));
+extern void set_loc P((char *file, int line));
+extern void r_fatal P((va_list va_alist, ...));
+#else
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+extern void msg (char *mesg, ...);
+extern void error (char *mesg, ...);
+extern void warning (char *mesg, ...);
+extern void set_loc (char *file, int line);
+extern void r_fatal (char *mesg, ...);
+#else
+extern void msg ();
+extern void error ();
+extern void warning ();
+extern void set_loc ();
+extern void r_fatal ();
+#endif
+#endif
+/* node.c */
+extern AWKNUM r_force_number P((NODE *n));
+extern NODE *format_val P((char *format, int index, NODE *s));
+extern NODE *r_force_string P((NODE *s));
+extern NODE *dupnode P((NODE *n));
+extern NODE *mk_number P((AWKNUM x, unsigned int flags));
+extern NODE *make_str_node P((char *s, size_t len, int scan ));
+extern NODE *tmp_string P((char *s, size_t len ));
+extern NODE *more_nodes P((void));
+#ifdef DEBUG
+extern void freenode P((NODE *it));
+#endif
+extern void unref P((NODE *tmp));
+extern int parse_escape P((char **string_ptr));
+/* re.c */
+extern Regexp *make_regexp P((char *s, size_t len, int ignorecase, int dfa));
+extern int research P((Regexp *rp, char *str, int start,
+		       size_t len, int need_start));
+extern void refree P((Regexp *rp));
+extern void reg_error P((const char *s));
+extern Regexp *re_update P((NODE *t));
+extern void resyntax P((int syntax));
+extern void resetup P((void));
+extern int avoid_dfa P((NODE *re, char *str, size_t len));	/* temporary */
+
+/* strncasecmp.c */
+extern int strncasecmp P((const char *s1, const char *s2, register size_t n));
+
+#if defined(atarist)
+#if defined(PIPES_SIMULATED)
+/* atari/tmpnam.c */
+extern char *tmpnam P((char *buf));
+extern char *tempnam P((const char *path, const char *base));
+#else
+#include <wait.h>
+#endif
+#include <fcntl.h>
+#define INVALID_HANDLE  (__SMALLEST_VALID_HANDLE - 1)
+#else
+#define INVALID_HANDLE (-1)
+#endif /* atarist */
+
+#ifndef STATIC
+#define STATIC static
+#endif
+
+#ifdef C_ALLOCA
+/* The __hpux check is to avoid conflicts with bison's definition of
+   alloca() in awktab.c.*/
+#if (defined(__STDC__) && __STDC__) || defined (__hpux)
+extern void *alloca P((unsigned));
+#else
+extern char *alloca P((unsigned));
+#endif
+#endif
diff --git a/contrib/awk/awk.y b/contrib/awk/awk.y
new file mode 100644
index 0000000..1b9a89b
--- /dev/null
+++ b/contrib/awk/awk.y
@@ -0,0 +1,2434 @@
+/*
+ * awk.y --- yacc/bison parser
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+%{
+#ifdef DEBUG
+#define YYDEBUG 12
+#endif
+
+#include "awk.h"
+
+#define CAN_FREE	TRUE
+#define DONT_FREE	FALSE
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void yyerror(const char *m, ...) ;
+#else
+static void yyerror(); /* va_alist */
+#endif
+static char *get_src_buf P((void));
+static int yylex P((void));
+static NODE *node_common P((NODETYPE op));
+static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
+static NODE *mkrangenode P((NODE *cpair));
+static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
+static NODE *append_right P((NODE *list, NODE *new));
+static void func_install P((NODE *params, NODE *def));
+static void pop_var P((NODE *np, int freeit));
+static void pop_params P((NODE *params));
+static NODE *make_param P((char *name));
+static NODE *mk_rexp P((NODE *exp));
+static int dup_parms P((NODE *func));
+static void param_sanity P((NODE *arglist));
+static int isnoeffect P((NODETYPE t));
+static int isassignable P((NODE *n));
+
+enum defref { FUNC_DEFINE, FUNC_USE };
+static void func_use P((char *name, enum defref how));
+static void check_funcs P((void));
+
+static int want_assign;		/* lexical scanning kludge */
+static int want_regexp;		/* lexical scanning kludge */
+static int can_return;		/* lexical scanning kludge */
+static int io_allowed = TRUE;	/* lexical scanning kludge */
+static char *lexptr;		/* pointer to next char during parsing */
+static char *lexend;
+static char *lexptr_begin;	/* keep track of where we were for error msgs */
+static char *lexeme;		/* beginning of lexeme for debugging */
+static char *thisline = NULL;
+#define YYDEBUG_LEXER_TEXT (lexeme)
+static int param_counter;
+static char *tokstart = NULL;
+static char *tok = NULL;
+static char *tokend;
+
+#define HASHSIZE	1021	/* this constant only used here */
+NODE *variables[HASHSIZE];
+
+extern char *source;
+extern int sourceline;
+extern struct src *srcfiles;
+extern int numfiles;
+extern int errcount;
+extern NODE *begin_block;
+extern NODE *end_block;
+%}
+
+%union {
+	long lval;
+	AWKNUM fval;
+	NODE *nodeval;
+	NODETYPE nodetypeval;
+	char *sval;
+	NODE *(*ptrval)();
+}
+
+%type <nodeval> function_prologue function_body
+%type <nodeval> rexp exp start program rule simp_exp
+%type <nodeval> non_post_simp_exp
+%type <nodeval> pattern 
+%type <nodeval>	action variable param_list
+%type <nodeval>	rexpression_list opt_rexpression_list
+%type <nodeval>	expression_list opt_expression_list
+%type <nodeval>	statements statement if_statement opt_param_list 
+%type <nodeval> opt_exp opt_variable regexp 
+%type <nodeval> input_redir output_redir
+%type <nodetypeval> print
+%type <sval> func_name
+%type <lval> lex_builtin
+
+%token <sval> FUNC_CALL NAME REGEXP
+%token <lval> ERROR
+%token <nodeval> YNUMBER YSTRING
+%token <nodetypeval> RELOP APPEND_OP
+%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP
+%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
+%token <nodetypeval> LEX_IN
+%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
+%token <lval> LEX_BUILTIN LEX_LENGTH
+
+/* these are just yylval numbers */
+
+/* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
+%left LEX_OR
+%left LEX_AND
+%left LEX_GETLINE
+%nonassoc LEX_IN
+%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
+%nonassoc ','
+%nonassoc MATCHOP
+%nonassoc RELOP '<' '>' '|' APPEND_OP
+%left CONCAT_OP
+%left YSTRING YNUMBER
+%left '+' '-'
+%left '*' '/' '%'
+%right '!' UNARY
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
+%left '(' ')'
+%%
+
+start
+	: opt_nls program opt_nls
+		{
+			expression_value = $2;
+			check_funcs();
+		}
+	;
+
+program
+	: rule
+		{ 
+			if ($1 != NULL)
+				$$ = $1;
+			else
+				$$ = NULL;
+			yyerrok;
+		}
+	| program rule
+		/* add the rule to the tail of list */
+		{
+			if ($2 == NULL)
+				$$ = $1;
+			else if ($1 == NULL)
+				$$ = $2;
+			else {
+				if ($1->type != Node_rule_list)
+					$1 = node($1, Node_rule_list,
+						(NODE*) NULL);
+				$$ = append_right($1,
+				   node($2, Node_rule_list, (NODE *) NULL));
+			}
+			yyerrok;
+		}
+	| error	{ $$ = NULL; }
+	| program error { $$ = NULL; }
+	| /* empty */ { $$ = NULL; }
+	;
+
+rule
+	: LEX_BEGIN { io_allowed = FALSE; }
+	  action
+	  {
+		if (begin_block != NULL) {
+			if (begin_block->type != Node_rule_list)
+				begin_block = node(begin_block, Node_rule_list,
+					(NODE *) NULL);
+			(void) append_right(begin_block, node(
+			    node((NODE *) NULL, Node_rule_node, $3),
+			    Node_rule_list, (NODE *) NULL) );
+		} else
+			begin_block = node((NODE *) NULL, Node_rule_node, $3);
+		$$ = NULL;
+		io_allowed = TRUE;
+		yyerrok;
+	  }
+	| LEX_END { io_allowed = FALSE; }
+	  action
+	  {
+		if (end_block != NULL) {
+			if (end_block->type != Node_rule_list)
+				end_block = node(end_block, Node_rule_list,
+					(NODE *) NULL);
+			(void) append_right (end_block, node(
+			    node((NODE *) NULL, Node_rule_node, $3),
+			    Node_rule_list, (NODE *) NULL));
+		} else
+			end_block = node((NODE *) NULL, Node_rule_node, $3);
+		$$ = NULL;
+		io_allowed = TRUE;
+		yyerrok;
+	  }
+	| LEX_BEGIN statement_term
+	  {
+		warning("BEGIN blocks must have an action part");
+		errcount++;
+		yyerrok;
+	  }
+	| LEX_END statement_term
+	  {
+		warning("END blocks must have an action part");
+		errcount++;
+		yyerrok;
+	  }
+	| pattern action
+		{ $$ = node($1, Node_rule_node, $2); yyerrok; }
+	| action
+		{ $$ = node((NODE *) NULL, Node_rule_node, $1); yyerrok; }
+	| pattern statement_term
+		{
+		  $$ = node($1,
+			     Node_rule_node,
+			     node(node(node(make_number(0.0),
+					    Node_field_spec,
+					    (NODE *) NULL),
+					Node_expression_list,
+					(NODE *) NULL),
+				  Node_K_print,
+				  (NODE *) NULL));
+		  yyerrok;
+		}
+	| function_prologue function_body
+		{
+			func_install($1, $2);
+			$$ = NULL;
+			yyerrok;
+		}
+	;
+
+func_name
+	: NAME
+		{ $$ = $1; }
+	| FUNC_CALL
+		{ $$ = $1; }
+	| lex_builtin
+	  {
+		yyerror("%s() is a built-in function, it cannot be redefined",
+			tokstart);
+		errcount++;
+		/* yyerrok; */
+	  }
+	;
+
+lex_builtin
+	: LEX_BUILTIN
+	| LEX_LENGTH
+	;
+		
+function_prologue
+	: LEX_FUNCTION 
+		{
+			param_counter = 0;
+		}
+	  func_name '(' opt_param_list r_paren opt_nls
+		{
+			NODE *t;
+
+			t = make_param($3);
+			t->flags |= FUNC;
+			$$ = append_right(t, $5);
+			can_return = TRUE;
+			/* check for duplicate parameter names */
+			if (dup_parms($$))
+				errcount++;
+		}
+	;
+
+function_body
+	: l_brace statements r_brace opt_semi
+	  {
+		$$ = $2;
+		can_return = FALSE;
+	  }
+	| l_brace r_brace opt_semi opt_nls
+	  {
+		$$ = node((NODE *) NULL, Node_K_return, (NODE *) NULL);
+		can_return = FALSE;
+	  }
+	;
+
+
+pattern
+	: exp
+		{ $$ = $1; }
+	| exp ',' exp
+		{ $$ = mkrangenode(node($1, Node_cond_pair, $3)); }
+	;
+
+regexp
+	/*
+	 * In this rule, want_regexp tells yylex that the next thing
+	 * is a regexp so it should read up to the closing slash.
+	 */
+	: '/'
+		{ ++want_regexp; }
+	  REGEXP '/'
+		{
+		  NODE *n;
+		  size_t len;
+
+		  getnode(n);
+		  n->type = Node_regex;
+		  len = strlen($3);
+		  n->re_exp = make_string($3, len);
+		  n->re_reg = make_regexp($3, len, FALSE, TRUE);
+		  n->re_text = NULL;
+		  n->re_flags = CONST;
+		  n->re_cnt = 1;
+		  $$ = n;
+		}
+	;
+
+action
+	: l_brace statements r_brace opt_semi opt_nls
+		{ $$ = $2; }
+	| l_brace r_brace opt_semi opt_nls
+		{ $$ = NULL; }
+	;
+
+statements
+	: statement
+		{
+			$$ = $1;
+			if (do_lint && isnoeffect($$->type))
+				warning("statement may have no effect");
+		}
+	| statements statement
+		{
+			if ($1 == NULL || $1->type != Node_statement_list)
+				$1 = node($1, Node_statement_list, (NODE *) NULL);
+	    		$$ = append_right($1,
+				node($2, Node_statement_list, (NODE *)   NULL));
+	    		yyerrok;
+		}
+	| error
+		{ $$ = NULL; }
+	| statements error
+		{ $$ = NULL; }
+	;
+
+statement_term
+	: nls
+	| semi opt_nls
+	;
+
+statement
+	: semi opt_nls
+		{ $$ = NULL; }
+	| l_brace r_brace
+		{ $$ = NULL; }
+	| l_brace statements r_brace
+		{ $$ = $2; }
+	| if_statement
+		{ $$ = $1; }
+	| LEX_WHILE '(' exp r_paren opt_nls statement
+		{ $$ = node($3, Node_K_while, $6); }
+	| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
+		{ $$ = node($6, Node_K_do, $3); }
+	| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
+	  {
+		$$ = node($8, Node_K_arrayfor,
+			make_for_loop(variable($3, CAN_FREE, Node_var),
+			(NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
+	  }
+	| LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement
+	  {
+		$$ = node($10, Node_K_for, (NODE *) make_for_loop($3, $5, $7));
+	  }
+	| LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement
+	  {
+		$$ = node($9, Node_K_for,
+			(NODE *) make_for_loop($3, (NODE *) NULL, $6));
+	  }
+	| LEX_BREAK statement_term
+	   /* for break, maybe we'll have to remember where to break to */
+		{ $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
+	| LEX_CONTINUE statement_term
+	   /* similarly */
+		{ $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
+	| print '(' expression_list r_paren output_redir statement_term
+		{ $$ = node($3, $1, $5); }
+	| print opt_rexpression_list output_redir statement_term
+		{
+			if ($1 == Node_K_print && $2 == NULL) {
+				static int warned = FALSE;
+
+				$2 = node(node(make_number(0.0),
+					       Node_field_spec,
+					       (NODE *) NULL),
+					  Node_expression_list,
+					  (NODE *) NULL);
+
+				if (do_lint && ! io_allowed && ! warned) {
+					warned = TRUE;
+					warning(
+	"plain `print' in BEGIN or END rule should probably be `print \"\"'");
+				}
+			}
+
+			$$ = node($2, $1, $3);
+		}
+	| LEX_NEXT opt_exp statement_term
+		{ NODETYPE type;
+
+		  if ($2) {
+			if ($2 == lookup("file")) {
+				static int warned = FALSE;
+
+				if (! warned) {
+					warned = TRUE;
+					warning("`next file' is obsolete; use `nextfile'");
+				}
+				if (do_lint)
+					warning("`next file' is a gawk extension");
+				if (do_traditional) {
+					/*
+					 * can't use yyerror, since may have overshot
+					 * the source line
+					 */
+					errcount++;
+					error("`next file' is a gawk extension");
+				}
+				if (! io_allowed) {
+					/* same thing */
+					errcount++;
+					error("`next file' used in BEGIN or END action");
+				}
+				type = Node_K_nextfile;
+			} else {
+				errcount++;
+				error("illegal expression after `next'");
+				type = Node_K_next;	/* sanity */
+			}
+		  } else {
+			if (! io_allowed)
+				yyerror("`next' used in BEGIN or END action");
+			type = Node_K_next;
+		  }
+		  $$ = node((NODE *) NULL, type, (NODE *) NULL);
+		}
+	| LEX_NEXTFILE statement_term
+		{
+		  if (do_lint)
+			warning("`nextfile' is a gawk extension");
+		  if (do_traditional) {
+			/*
+			 * can't use yyerror, since may have overshot
+			 * the source line
+			 */
+			errcount++;
+			error("`nextfile' is a gawk extension");
+		  }
+		  if (! io_allowed) {
+			/* same thing */
+			errcount++;
+			error("`nextfile' used in BEGIN or END action");
+		  }
+		  $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
+		}
+	| LEX_EXIT opt_exp statement_term
+		{ $$ = node($2, Node_K_exit, (NODE *) NULL); }
+	| LEX_RETURN
+		{
+		  if (! can_return)
+			yyerror("`return' used outside function context");
+		}
+	  opt_exp statement_term
+		{ $$ = node($3, Node_K_return, (NODE *) NULL); }
+	| LEX_DELETE NAME '[' expression_list ']' statement_term
+		{ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
+	| LEX_DELETE NAME  statement_term
+		{
+		  if (do_lint)
+			warning("`delete array' is a gawk extension");
+		  if (do_traditional) {
+			/*
+			 * can't use yyerror, since may have overshot
+			 * the source line
+			 */
+			errcount++;
+			error("`delete array' is a gawk extension");
+		  }
+		  $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
+		}
+	| exp statement_term
+		{ $$ = $1; }
+	;
+
+print
+	: LEX_PRINT
+		{ $$ = $1; }
+	| LEX_PRINTF
+		{ $$ = $1; }
+	;
+
+if_statement
+	: LEX_IF '(' exp r_paren opt_nls statement
+	  {
+		$$ = node($3, Node_K_if, 
+			node($6, Node_if_branches, (NODE *) NULL));
+	  }
+	| LEX_IF '(' exp r_paren opt_nls statement
+	     LEX_ELSE opt_nls statement
+		{ $$ = node($3, Node_K_if,
+				node($6, Node_if_branches, $9)); }
+	;
+
+nls
+	: NEWLINE
+		{ want_assign = FALSE; }
+	| nls NEWLINE
+	;
+
+opt_nls
+	: /* empty */
+	| nls
+	;
+
+input_redir
+	: /* empty */
+		{ $$ = NULL; }
+	| '<' simp_exp
+		{ $$ = node($2, Node_redirect_input, (NODE *) NULL); }
+	;
+
+output_redir
+	: /* empty */
+		{ $$ = NULL; }
+	| '>' exp
+		{ $$ = node($2, Node_redirect_output, (NODE *) NULL); }
+	| APPEND_OP exp
+		{ $$ = node($2, Node_redirect_append, (NODE *) NULL); }
+	| '|' exp
+		{ $$ = node($2, Node_redirect_pipe, (NODE *) NULL); }
+	;
+
+opt_param_list
+	: /* empty */
+		{ $$ = NULL; }
+	| param_list
+		{ $$ = $1; }
+	;
+
+param_list
+	: NAME
+		{ $$ = make_param($1); }
+	| param_list comma NAME
+		{ $$ = append_right($1, make_param($3)); yyerrok; }
+	| error
+		{ $$ = NULL; }
+	| param_list error
+		{ $$ = NULL; }
+	| param_list comma error
+		{ $$ = NULL; }
+	;
+
+/* optional expression, as in for loop */
+opt_exp
+	: /* empty */
+		{ $$ = NULL; }
+	| exp
+		{ $$ = $1; }
+	;
+
+opt_rexpression_list
+	: /* empty */
+		{ $$ = NULL; }
+	| rexpression_list
+		{ $$ = $1; }
+	;
+
+rexpression_list
+	: rexp
+		{ $$ = node($1, Node_expression_list, (NODE *) NULL); }
+	| rexpression_list comma rexp
+	  {
+		$$ = append_right($1,
+			node($3, Node_expression_list, (NODE *) NULL));
+		yyerrok;
+	  }
+	| error
+		{ $$ = NULL; }
+	| rexpression_list error
+		{ $$ = NULL; }
+	| rexpression_list error rexp
+		{ $$ = NULL; }
+	| rexpression_list comma error
+		{ $$ = NULL; }
+	;
+
+opt_expression_list
+	: /* empty */
+		{ $$ = NULL; }
+	| expression_list
+		{ $$ = $1; }
+	;
+
+expression_list
+	: exp
+		{ $$ = node($1, Node_expression_list, (NODE *) NULL); }
+	| expression_list comma exp
+		{
+			$$ = append_right($1,
+				node($3, Node_expression_list, (NODE *) NULL));
+			yyerrok;
+		}
+	| error
+		{ $$ = NULL; }
+	| expression_list error
+		{ $$ = NULL; }
+	| expression_list error exp
+		{ $$ = NULL; }
+	| expression_list comma error
+		{ $$ = NULL; }
+	;
+
+/* Expressions, not including the comma operator.  */
+exp	: variable ASSIGNOP 
+		{ want_assign = FALSE; }
+	  exp
+		{
+		  if (do_lint && $4->type == Node_regex)
+			warning("Regular expression on left of assignment.");
+		  $$ = node($1, $2, $4);
+		}
+	| '(' expression_list r_paren LEX_IN NAME
+		{ $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
+	| exp '|' LEX_GETLINE opt_variable
+		{
+		  $$ = node($4, Node_K_getline,
+			 node($1, Node_redirect_pipein, (NODE *) NULL));
+		}
+	| LEX_GETLINE opt_variable input_redir
+		{
+		  if (do_lint && ! io_allowed && $3 == NULL)
+			warning("non-redirected getline undefined inside BEGIN or END action");
+		  $$ = node($2, Node_K_getline, $3);
+		}
+	| exp LEX_AND exp
+		{ $$ = node($1, Node_and, $3); }
+	| exp LEX_OR exp
+		{ $$ = node($1, Node_or, $3); }
+	| exp MATCHOP exp
+		{
+		  if ($1->type == Node_regex)
+			warning("Regular expression on left of MATCH operator.");
+		  $$ = node($1, $2, mk_rexp($3));
+		}
+	| regexp
+		{
+		  $$ = $1;
+		  if (do_lint && tokstart[0] == '*') {
+			/* possible C comment */
+			int n = strlen(tokstart) - 1;
+			if (tokstart[n] == '*')
+				warning("regexp looks like a C comment, but is not");
+		  }
+		}
+	| '!' regexp %prec UNARY
+		{
+		  $$ = node(node(make_number(0.0),
+				 Node_field_spec,
+				 (NODE *) NULL),
+		            Node_nomatch,
+			    $2);
+		}
+	| exp LEX_IN NAME
+		{ $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
+	| exp RELOP exp
+		{
+		  if (do_lint && $3->type == Node_regex)
+			warning("Regular expression on left of comparison.");
+		  $$ = node($1, $2, $3);
+		}
+	| exp '<' exp
+		{ $$ = node($1, Node_less, $3); }
+	| exp '>' exp
+		{ $$ = node($1, Node_greater, $3); }
+	| exp '?' exp ':' exp
+		{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+	| simp_exp
+		{ $$ = $1; }
+	| exp simp_exp %prec CONCAT_OP
+		{ $$ = node($1, Node_concat, $2); }
+	;
+
+rexp	
+	: variable ASSIGNOP 
+		{ want_assign = FALSE; }
+	  rexp
+		{ $$ = node($1, $2, $4); }
+	| rexp LEX_AND rexp
+		{ $$ = node($1, Node_and, $3); }
+	| rexp LEX_OR rexp
+		{ $$ = node($1, Node_or, $3); }
+	| LEX_GETLINE opt_variable input_redir
+		{
+		  if (do_lint && ! io_allowed && $3 == NULL)
+			warning("non-redirected getline undefined inside BEGIN or END action");
+		  $$ = node($2, Node_K_getline, $3);
+		}
+	| regexp
+		{ $$ = $1; } 
+	| '!' regexp %prec UNARY
+		{ $$ = node((NODE *) NULL, Node_nomatch, $2); }
+	| rexp MATCHOP rexp
+		 { $$ = node($1, $2, mk_rexp($3)); }
+	| rexp LEX_IN NAME
+		{ $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
+	| rexp RELOP rexp
+		{ $$ = node($1, $2, $3); }
+	| rexp '?' rexp ':' rexp
+		{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
+	| simp_exp
+		{ $$ = $1; }
+	| rexp simp_exp %prec CONCAT_OP
+		{ $$ = node($1, Node_concat, $2); }
+	;
+
+simp_exp
+	: non_post_simp_exp
+	/* Binary operators in order of decreasing precedence.  */
+	| simp_exp '^' simp_exp
+		{ $$ = node($1, Node_exp, $3); }
+	| simp_exp '*' simp_exp
+		{ $$ = node($1, Node_times, $3); }
+	| simp_exp '/' simp_exp
+		{ $$ = node($1, Node_quotient, $3); }
+	| simp_exp '%' simp_exp
+		{ $$ = node($1, Node_mod, $3); }
+	| simp_exp '+' simp_exp
+		{ $$ = node($1, Node_plus, $3); }
+	| simp_exp '-' simp_exp
+		{ $$ = node($1, Node_minus, $3); }
+	| variable INCREMENT
+		{ $$ = node($1, Node_postincrement, (NODE *) NULL); }
+	| variable DECREMENT
+		{ $$ = node($1, Node_postdecrement, (NODE *) NULL); }
+	;
+
+non_post_simp_exp
+	: '!' simp_exp %prec UNARY
+		{ $$ = node($2, Node_not, (NODE *) NULL); }
+	| '(' exp r_paren
+		{ $$ = $2; }
+	| LEX_BUILTIN
+	  '(' opt_expression_list r_paren
+		{ $$ = snode($3, Node_builtin, (int) $1); }
+	| LEX_LENGTH '(' opt_expression_list r_paren
+		{ $$ = snode($3, Node_builtin, (int) $1); }
+	| LEX_LENGTH
+	  {
+		if (do_lint)
+			warning("call of `length' without parentheses is not portable");
+		$$ = snode((NODE *) NULL, Node_builtin, (int) $1);
+		if (do_posix)
+			warning("call of `length' without parentheses is deprecated by POSIX");
+	  }
+	| FUNC_CALL '(' opt_expression_list r_paren
+	  {
+		$$ = node($3, Node_func_call, make_string($1, strlen($1)));
+		func_use($1, FUNC_USE);
+		param_sanity($3);
+		free($1);
+	  }
+	| variable
+	| INCREMENT variable
+		{ $$ = node($2, Node_preincrement, (NODE *) NULL); }
+	| DECREMENT variable
+		{ $$ = node($2, Node_predecrement, (NODE *) NULL); }
+	| YNUMBER
+		{ $$ = $1; }
+	| YSTRING
+		{ $$ = $1; }
+
+	| '-' simp_exp    %prec UNARY
+		{
+		  if ($2->type == Node_val) {
+			$2->numbr = -(force_number($2));
+			$$ = $2;
+		  } else
+			$$ = node($2, Node_unary_minus, (NODE *) NULL);
+		}
+	| '+' simp_exp    %prec UNARY
+		{
+		  /*
+		   * was: $$ = $2
+		   * POSIX semantics: force a conversion to numeric type
+		   */
+		  $$ = node (make_number(0.0), Node_plus, $2);
+		}
+	;
+
+opt_variable
+	: /* empty */
+		{ $$ = NULL; }
+	| variable
+		{ $$ = $1; }
+	;
+
+variable
+	: NAME
+		{ $$ = variable($1, CAN_FREE, Node_var); }
+	| NAME '[' expression_list ']'
+		{
+		if ($3->rnode == NULL) {
+			$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
+			freenode($3);
+		} else
+			$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
+		}
+	| '$' non_post_simp_exp
+		{ $$ = node($2, Node_field_spec, (NODE *) NULL); }
+	;
+
+l_brace
+	: '{' opt_nls
+	;
+
+r_brace
+	: '}' opt_nls	{ yyerrok; }
+	;
+
+r_paren
+	: ')' { yyerrok; }
+	;
+
+opt_semi
+	: /* empty */
+	| semi
+	;
+
+semi
+	: ';'	{ yyerrok; want_assign = FALSE; }
+	;
+
+comma	: ',' opt_nls	{ yyerrok; }
+	;
+
+%%
+
+struct token {
+	const char *operator;		/* text to match */
+	NODETYPE value;		/* node type */
+	int class;		/* lexical class */
+	unsigned flags;		/* # of args. allowed and compatability */
+#	define	ARGS	0xFF	/* 0, 1, 2, 3 args allowed (any combination */
+#	define	A(n)	(1<<(n))
+#	define	VERSION	0xFF00	/* old awk is zero */
+#	define	NOT_OLD		0x0100	/* feature not in old awk */
+#	define	NOT_POSIX	0x0200	/* feature not in POSIX */
+#	define	GAWKX		0x0400	/* gawk extension */
+#	define	RESX		0x0800	/* Bell Labs Research extension */
+	NODE *(*ptr)();		/* function that implements this keyword */
+};
+
+extern NODE
+	*do_exp(),	*do_getline(),	*do_index(),	*do_length(),
+	*do_sqrt(),	*do_log(),	*do_sprintf(),	*do_substr(),
+	*do_split(),	*do_system(),	*do_int(),	*do_close(),
+	*do_atan2(),	*do_sin(),	*do_cos(),	*do_rand(),
+	*do_srand(),	*do_match(),	*do_tolower(),	*do_toupper(),
+	*do_sub(),	*do_gsub(),	*do_strftime(),	*do_systime(),
+	*do_fflush();
+
+/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
+
+static struct token tokentab[] = {
+{"BEGIN",	Node_illegal,	 LEX_BEGIN,	0,		0},
+{"END",		Node_illegal,	 LEX_END,	0,		0},
+#ifdef BITOPS
+{"and",		Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_and},
+#endif /* BITOPS */
+{"atan2",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2),	do_atan2},
+{"break",	Node_K_break,	 LEX_BREAK,	0,		0},
+{"close",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_close},
+#ifdef BITOPS
+{"compl",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_compl},
+#endif /* BITOPS */
+{"continue",	Node_K_continue, LEX_CONTINUE,	0,		0},
+{"cos",		Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_cos},
+{"delete",	Node_K_delete,	 LEX_DELETE,	NOT_OLD,	0},
+{"do",		Node_K_do,	 LEX_DO,	NOT_OLD,	0},
+{"else",	Node_illegal,	 LEX_ELSE,	0,		0},
+{"exit",	Node_K_exit,	 LEX_EXIT,	0,		0},
+{"exp",		Node_builtin,	 LEX_BUILTIN,	A(1),		do_exp},
+{"fflush",	Node_builtin,	 LEX_BUILTIN,	RESX|A(0)|A(1), do_fflush},
+{"for",		Node_K_for,	 LEX_FOR,	0,		0},
+{"func",	Node_K_function, LEX_FUNCTION,	NOT_POSIX|NOT_OLD,	0},
+{"function",	Node_K_function, LEX_FUNCTION,	NOT_OLD,	0},
+{"gensub",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(3)|A(4), do_gensub},
+{"getline",	Node_K_getline,	 LEX_GETLINE,	NOT_OLD,	0},
+{"gsub",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), do_gsub},
+{"if",		Node_K_if,	 LEX_IF,	0,		0},
+{"in",		Node_illegal,	 LEX_IN,	0,		0},
+{"index",	Node_builtin,	 LEX_BUILTIN,	A(2),		do_index},
+{"int",		Node_builtin,	 LEX_BUILTIN,	A(1),		do_int},
+{"length",	Node_builtin,	 LEX_LENGTH,	A(0)|A(1),	do_length},
+{"log",		Node_builtin,	 LEX_BUILTIN,	A(1),		do_log},
+#ifdef BITOPS
+{"lshift",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_lshift},
+#endif /* BITOPS */
+{"match",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2),	do_match},
+{"next",	Node_K_next,	 LEX_NEXT,	0,		0},
+{"nextfile",	Node_K_nextfile, LEX_NEXTFILE,	GAWKX,		0},
+#ifdef BITOPS
+{"or",		Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_or},
+#endif /* BITOPS */
+{"print",	Node_K_print,	 LEX_PRINT,	0,		0},
+{"printf",	Node_K_printf,	 LEX_PRINTF,	0,		0},
+{"rand",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(0),	do_rand},
+{"return",	Node_K_return,	 LEX_RETURN,	NOT_OLD,	0},
+#ifdef BITOPS
+{"rshift",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_rshift},
+#endif /* BITOPS */
+{"sin",		Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_sin},
+{"split",	Node_builtin,	 LEX_BUILTIN,	A(2)|A(3),	do_split},
+{"sprintf",	Node_builtin,	 LEX_BUILTIN,	0,		do_sprintf},
+{"sqrt",	Node_builtin,	 LEX_BUILTIN,	A(1),		do_sqrt},
+{"srand",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(0)|A(1), do_srand},
+{"strftime",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(0)|A(1)|A(2), do_strftime},
+#ifdef BITOPS
+{"strtonum",	Node_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_strtonum},
+#endif /* BITOPS */
+{"sub",		Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), do_sub},
+{"substr",	Node_builtin,	 LEX_BUILTIN,	A(2)|A(3),	do_substr},
+{"system",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_system},
+{"systime",	Node_builtin,	 LEX_BUILTIN,	GAWKX|A(0),	do_systime},
+{"tolower",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_tolower},
+{"toupper",	Node_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_toupper},
+{"while",	Node_K_while,	 LEX_WHILE,	0,		0},
+#ifdef BITOPS
+{"xor",		Node_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_xor},
+#endif /* BITOPS */
+};
+
+/* yyerror --- print a syntax error message, show where */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void
+yyerror(const char *m, ...)
+#else
+/* VARARGS0 */
+static void
+yyerror(va_alist)
+va_dcl
+#endif
+{
+	va_list args;
+	const char *mesg = NULL;
+	register char *bp, *cp;
+	char *scan;
+	char buf[120];
+	static char end_of_file_line[] = "(END OF FILE)";
+
+	errcount++;
+	/* Find the current line in the input file */
+	if (lexptr && lexeme) {
+		if (thisline == NULL) {
+			cp = lexeme;
+			if (*cp == '\n') {
+				cp--;
+				mesg = "unexpected newline";
+			}
+			for (; cp != lexptr_begin && *cp != '\n'; --cp)
+				continue;
+			if (*cp == '\n')
+				cp++;
+			thisline = cp;
+		}
+		/* NL isn't guaranteed */
+		bp = lexeme;
+		while (bp < lexend && *bp && *bp != '\n')
+			bp++;
+	} else {
+		thisline = end_of_file_line;
+		bp = thisline + strlen(thisline);
+	}
+	msg("%.*s", (int) (bp - thisline), thisline);
+	bp = buf;
+	cp = buf + sizeof(buf) - 24;	/* 24 more than longest msg. input */
+	if (lexptr != NULL) {
+		scan = thisline;
+		while (bp < cp && scan < lexeme)
+			if (*scan++ == '\t')
+				*bp++ = '\t';
+			else
+				*bp++ = ' ';
+		*bp++ = '^';
+		*bp++ = ' ';
+	}
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+	va_start(args, m);
+	if (mesg == NULL)
+		mesg = m;
+#else
+	va_start(args);
+	if (mesg == NULL)
+		mesg = va_arg(args, char *);
+#endif
+	strcpy(bp, mesg);
+	err("", buf, args);
+	va_end(args);
+}
+
+/* get_src_buf --- read the next buffer of source program */
+
+static char *
+get_src_buf()
+{
+	static int samefile = FALSE;
+	static int nextfile = 0;
+	static char *buf = NULL;
+	static int fd;
+	int n;
+	register char *scan;
+	static int len = 0;
+	static int did_newline = FALSE;
+	int newfile;
+	struct stat sbuf;
+
+#	define	SLOP	128	/* enough space to hold most source lines */
+
+again:
+	newfile = FALSE;
+	if (nextfile > numfiles)
+		return NULL;
+
+	if (srcfiles[nextfile].stype == CMDLINE) {
+		if (len == 0) {
+			len = strlen(srcfiles[nextfile].val);
+			if (len == 0) {
+				/*
+				 * Yet Another Special case:
+				 *	gawk '' /path/name
+				 * Sigh.
+				 */
+				static int warned = FALSE;
+
+				if (do_lint && ! warned) {
+					warned = TRUE;
+					warning("empty program text on command line");
+				}
+				++nextfile;
+				goto again;
+			}
+			sourceline = 1;
+			lexptr = lexptr_begin = srcfiles[nextfile].val;
+			lexend = lexptr + len;
+		} else if (! did_newline && *(lexptr-1) != '\n') {
+			/*
+			 * The following goop is to ensure that the source
+			 * ends with a newline and that the entire current
+			 * line is available for error messages.
+			 */
+			int offset;
+
+			did_newline = TRUE;
+			offset = lexptr - lexeme;
+			for (scan = lexeme; scan > lexptr_begin; scan--)
+				if (*scan == '\n') {
+					scan++;
+					break;
+				}
+			len = lexptr - scan;
+			emalloc(buf, char *, len+1, "get_src_buf");
+			memcpy(buf, scan, len);
+			thisline = buf;
+			lexptr = buf + len;
+			*lexptr = '\n';
+			lexeme = lexptr - offset;
+			lexptr_begin = buf;
+			lexend = lexptr + 1;
+		} else {
+			len = 0;
+			lexeme = lexptr = lexptr_begin = NULL;
+		}
+		if (lexptr == NULL && ++nextfile <= numfiles)
+			goto again;
+		return lexptr;
+	}
+	if (! samefile) {
+		source = srcfiles[nextfile].val;
+		if (source == NULL) {
+			if (buf != NULL) {
+				free(buf);
+				buf = NULL;
+			}
+			len = 0;
+			return lexeme = lexptr = lexptr_begin = NULL;
+		}
+		fd = pathopen(source);
+		if (fd <= INVALID_HANDLE) {
+			char *in;
+
+			/* suppress file name and line no. in error mesg */
+			in = source;
+			source = NULL;
+			fatal("can't open source file \"%s\" for reading (%s)",
+				in, strerror(errno));
+		}
+		len = optimal_bufsize(fd, & sbuf);
+		newfile = TRUE;
+		if (buf != NULL)
+			free(buf);
+		emalloc(buf, char *, len + SLOP, "get_src_buf");
+		lexptr_begin = buf + SLOP;
+		samefile = TRUE;
+		sourceline = 1;
+	} else {
+		/*
+		 * Here, we retain the current source line (up to length SLOP)
+		 * in the beginning of the buffer that was overallocated above
+		 */
+		int offset;
+		int linelen;
+
+		offset = lexptr - lexeme;
+		for (scan = lexeme; scan > lexptr_begin; scan--)
+			if (*scan == '\n') {
+				scan++;
+				break;
+			}
+		linelen = lexptr - scan;
+		if (linelen > SLOP)
+			linelen = SLOP;
+		thisline = buf + SLOP - linelen;
+		memcpy(thisline, scan, linelen);
+		lexeme = buf + SLOP - offset;
+		lexptr_begin = thisline;
+	}
+	n = read(fd, buf + SLOP, len);
+	if (n == -1)
+		fatal("can't read sourcefile \"%s\" (%s)",
+			source, strerror(errno));
+	if (n == 0) {
+		if (newfile) {
+			static int warned = FALSE;
+
+			if (do_lint && ! warned) {
+				warned = TRUE;
+				warning("source file `%s' is empty", source);
+			}
+		}
+		close(fd);
+		samefile = FALSE;
+		nextfile++;
+		if (lexeme)
+			*lexeme = '\0';
+		len = 0;
+		goto again;
+	}
+	lexptr = buf + SLOP;
+	lexend = lexptr + n;
+	return buf;
+}
+
+/* tokadd --- add a character to the token buffer */
+
+#define	tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
+
+/* tokexpand --- grow the token buffer */
+
+char *
+tokexpand()
+{
+	static int toksize = 60;
+	int tokoffset;
+
+	tokoffset = tok - tokstart;
+	toksize *= 2;
+	if (tokstart != NULL)
+		erealloc(tokstart, char *, toksize, "tokexpand");
+	else
+		emalloc(tokstart, char *, toksize, "tokexpand");
+	tokend = tokstart + toksize;
+	tok = tokstart + tokoffset;
+	return tok;
+}
+
+/* nextc --- get the next input character */
+
+#if DEBUG
+int
+nextc()
+{
+	int c;
+
+	if (lexptr && lexptr < lexend)
+		c = *lexptr++;
+	else if (get_src_buf())
+		c = *lexptr++;
+	else
+		c = EOF;
+
+	return c;
+}
+#else
+#define	nextc()	((lexptr && lexptr < lexend) ? \
+			*lexptr++ : \
+			(get_src_buf() ? *lexptr++ : EOF) \
+		)
+#endif
+
+/* pushback --- push a character back on the input */
+
+#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
+
+/* allow_newline --- allow newline after &&, ||, ? and : */
+
+static void
+allow_newline()
+{
+	int c;
+
+	for (;;) {
+		c = nextc();
+		if (c == EOF)
+			break;
+		if (c == '#') {
+			while ((c = nextc()) != '\n' && c != EOF)
+				continue;
+			if (c == EOF)
+				break;
+		}
+		if (c == '\n')
+			sourceline++;
+		if (! isspace(c)) {
+			pushback();
+			break;
+		}
+	}
+}
+
+/* yylex --- Read the input and turn it into tokens. */
+
+static int
+yylex()
+{
+	register int c, c1;
+	int seen_e = FALSE;		/* These are for numbers */
+	int seen_point = FALSE;
+	int esc_seen;		/* for literal strings */
+	int low, mid, high;
+	static int did_newline = FALSE;
+	char *tokkey;
+	static int lasttok = 0, eof_warned = FALSE;
+	int inhex = FALSE;
+
+	if (nextc() == EOF) {
+		if (lasttok != NEWLINE) {
+			lasttok = NEWLINE;
+			if (do_lint && ! eof_warned) {
+				warning("source file does not end in newline");
+				eof_warned = TRUE;
+			}
+			return NEWLINE;	/* fake it */
+		}
+		return 0;
+	}
+	pushback();
+#ifdef OS2
+	/*
+	 * added for OS/2's extproc feature of cmd.exe
+	 * (like #! in BSD sh)
+	 */
+	if (strncasecmp(lexptr, "extproc ", 8) == 0) {
+		while (*lexptr && *lexptr != '\n')
+			lexptr++;
+	}
+#endif
+	lexeme = lexptr;
+	thisline = NULL;
+	if (want_regexp) {
+		int in_brack = 0;	/* count brackets, [[:alnum:]] allowed */
+		/*
+		 * Counting brackets is non-trivial. [[] is ok,
+		 * and so is [\]], with a point being that /[/]/ as a regexp
+		 * constant has to work.
+		 *
+		 * Do not count [ or ] if either one is preceded by a \.
+		 * A `[' should be counted if
+		 *  a) it is the first one so far (in_brack == 0)
+		 *  b) it is the `[' in `[:'
+		 * A ']' should be counted if not preceded by a \, since
+		 * it is either closing `:]' or just a plain list.
+		 * According to POSIX, []] is how you put a ] into a set.
+		 * Try to handle that too.
+		 *
+		 * The code for \ handles \[ and \].
+		 */
+
+		want_regexp = FALSE;
+		tok = tokstart;
+		for (;;) {
+			c = nextc();
+			switch (c) {
+			case '[':
+				/* one day check for `.' and `=' too */
+				if ((c1 = nextc()) == ':' || in_brack == 0)
+					in_brack++;
+				pushback();
+				break;
+			case ']':
+				if (tokstart[0] == '['
+				    && (tok == tokstart + 1
+					|| (tok == tokstart + 2
+					    && tokstart[1] == '^')))
+					/* do nothing */;
+				else
+					in_brack--;
+				break;
+			case '\\':
+				if ((c = nextc()) == EOF) {
+					yyerror("unterminated regexp ends with \\ at end of file");
+					return lasttok = REGEXP; /* kludge */
+				} else if (c == '\n') {
+					sourceline++;
+					continue;
+				} else {
+					tokadd('\\');
+					tokadd(c);
+					continue;
+				}
+				break;
+			case '/':	/* end of the regexp */
+				if (in_brack > 0)
+					break;
+
+				pushback();
+				tokadd('\0');
+				yylval.sval = tokstart;
+				return lasttok = REGEXP;
+			case '\n':
+				pushback();
+				yyerror("unterminated regexp");
+				return lasttok = REGEXP;	/* kludge */
+			case EOF:
+				yyerror("unterminated regexp at end of file");
+				return lasttok = REGEXP;	/* kludge */
+			}
+			tokadd(c);
+		}
+	}
+retry:
+	while ((c = nextc()) == ' ' || c == '\t')
+		continue;
+
+	lexeme = lexptr ? lexptr - 1 : lexptr;
+	thisline = NULL;
+	tok = tokstart;
+	yylval.nodetypeval = Node_illegal;
+
+	switch (c) {
+	case EOF:
+		if (lasttok != NEWLINE) {
+			lasttok = NEWLINE;
+			if (do_lint && ! eof_warned) {
+				warning("source file does not end in newline");
+				eof_warned = TRUE;
+			}
+			return NEWLINE;	/* fake it */
+		}
+		return 0;
+
+	case '\n':
+		sourceline++;
+		return lasttok = NEWLINE;
+
+	case '#':		/* it's a comment */
+		while ((c = nextc()) != '\n') {
+			if (c == EOF) {
+				if (lasttok != NEWLINE) {
+					lasttok = NEWLINE;
+					if (do_lint && ! eof_warned) {
+						warning(
+				"source file does not end in newline");
+						eof_warned = TRUE;
+					}
+					return NEWLINE;	/* fake it */
+				}
+				return 0;
+			}
+		}
+		sourceline++;
+		return lasttok = NEWLINE;
+
+	case '\\':
+#ifdef RELAXED_CONTINUATION
+		/*
+		 * This code puports to allow comments and/or whitespace
+		 * after the `\' at the end of a line used for continuation.
+		 * Use it at your own risk. We think it's a bad idea, which
+		 * is why it's not on by default.
+		 */
+		if (! do_traditional) {
+			/* strip trailing white-space and/or comment */
+			while ((c = nextc()) == ' ' || c == '\t')
+				continue;
+			if (c == '#') {
+				if (do_lint)
+					warning(
+		"use of `\\ #...' line continuation is not portable");
+				while ((c = nextc()) != '\n')
+					if (c == EOF)
+						break;
+			}
+			pushback();
+		}
+#endif /* RELAXED_CONTINUATION */
+		if (nextc() == '\n') {
+			sourceline++;
+			goto retry;
+		} else {
+			yyerror("backslash not last character on line");
+			exit(1);
+		}
+		break;
+
+	case '$':
+		want_assign = TRUE;
+		return lasttok = '$';
+
+	case ':':
+	case '?':
+		allow_newline();
+		/* fall through */
+	case ')':
+	case ']':
+	case '(':	
+	case '[':
+	case ';':
+	case '{':
+	case ',':
+		return lasttok = c;
+
+	case '*':
+		if ((c = nextc()) == '=') {
+			yylval.nodetypeval = Node_assign_times;
+			return lasttok = ASSIGNOP;
+		} else if (do_posix) {
+			pushback();
+			return lasttok = '*';
+		} else if (c == '*') {
+			/* make ** and **= aliases for ^ and ^= */
+			static int did_warn_op = FALSE, did_warn_assgn = FALSE;
+
+			if (nextc() == '=') {
+				if (do_lint && ! did_warn_assgn) {
+					did_warn_assgn = TRUE;
+					warning("**= is not allowed by POSIX");
+					warning("operator `**=' is not supported in old awk");
+				}
+				yylval.nodetypeval = Node_assign_exp;
+				return ASSIGNOP;
+			} else {
+				pushback();
+				if (do_lint && ! did_warn_op) {
+					did_warn_op = TRUE;
+					warning("** is not allowed by POSIX");
+					warning("operator `**' is not supported in old awk");
+				}
+				return lasttok = '^';
+			}
+		}
+		pushback();
+		return lasttok = '*';
+
+	case '/':
+		if (want_assign) {
+			if (nextc() == '=') {
+				yylval.nodetypeval = Node_assign_quotient;
+				return lasttok = ASSIGNOP;
+			}
+			pushback();
+		}
+		return lasttok = '/';
+
+	case '%':
+		if (nextc() == '=') {
+			yylval.nodetypeval = Node_assign_mod;
+			return lasttok = ASSIGNOP;
+		}
+		pushback();
+		return lasttok = '%';
+
+	case '^':
+	{
+		static int did_warn_op = FALSE, did_warn_assgn = FALSE;
+
+		if (nextc() == '=') {
+			if (do_lint && ! did_warn_assgn) {
+				did_warn_assgn = TRUE;
+				warning("operator `^=' is not supported in old awk");
+			}
+			yylval.nodetypeval = Node_assign_exp;
+			return lasttok = ASSIGNOP;
+		}
+		pushback();
+		if (do_lint && ! did_warn_op) {
+			did_warn_op = TRUE;
+			warning("operator `^' is not supported in old awk");
+		}
+		return lasttok = '^';
+	}
+
+	case '+':
+		if ((c = nextc()) == '=') {
+			yylval.nodetypeval = Node_assign_plus;
+			return lasttok = ASSIGNOP;
+		}
+		if (c == '+')
+			return lasttok = INCREMENT;
+		pushback();
+		return lasttok = '+';
+
+	case '!':
+		if ((c = nextc()) == '=') {
+			yylval.nodetypeval = Node_notequal;
+			return lasttok = RELOP;
+		}
+		if (c == '~') {
+			yylval.nodetypeval = Node_nomatch;
+			want_assign = FALSE;
+			return lasttok = MATCHOP;
+		}
+		pushback();
+		return lasttok = '!';
+
+	case '<':
+		if (nextc() == '=') {
+			yylval.nodetypeval = Node_leq;
+			return lasttok = RELOP;
+		}
+		yylval.nodetypeval = Node_less;
+		pushback();
+		return lasttok = '<';
+
+	case '=':
+		if (nextc() == '=') {
+			yylval.nodetypeval = Node_equal;
+			return lasttok = RELOP;
+		}
+		yylval.nodetypeval = Node_assign;
+		pushback();
+		return lasttok = ASSIGNOP;
+
+	case '>':
+		if ((c = nextc()) == '=') {
+			yylval.nodetypeval = Node_geq;
+			return lasttok = RELOP;
+		} else if (c == '>') {
+			yylval.nodetypeval = Node_redirect_append;
+			return lasttok = APPEND_OP;
+		}
+		yylval.nodetypeval = Node_greater;
+		pushback();
+		return lasttok = '>';
+
+	case '~':
+		yylval.nodetypeval = Node_match;
+		want_assign = FALSE;
+		return lasttok = MATCHOP;
+
+	case '}':
+		/*
+		 * Added did newline stuff.  Easier than
+		 * hacking the grammar.
+		 */
+		if (did_newline) {
+			did_newline = FALSE;
+			return lasttok = c;
+		}
+		did_newline++;
+		--lexptr;	/* pick up } next time */
+		return lasttok = NEWLINE;
+
+	case '"':
+		esc_seen = FALSE;
+		while ((c = nextc()) != '"') {
+			if (c == '\n') {
+				pushback();
+				yyerror("unterminated string");
+				exit(1);
+			}
+			if (c == '\\') {
+				c = nextc();
+				if (c == '\n') {
+					sourceline++;
+					continue;
+				}
+				esc_seen = TRUE;
+				tokadd('\\');
+			}
+			if (c == EOF) {
+				pushback();
+				yyerror("unterminated string");
+				exit(1);
+			}
+			tokadd(c);
+		}
+		yylval.nodeval = make_str_node(tokstart,
+					tok - tokstart, esc_seen ? SCAN : 0);
+		yylval.nodeval->flags |= PERM;
+		return lasttok = YSTRING;
+
+	case '-':
+		if ((c = nextc()) == '=') {
+			yylval.nodetypeval = Node_assign_minus;
+			return lasttok = ASSIGNOP;
+		}
+		if (c == '-')
+			return lasttok = DECREMENT;
+		pushback();
+		return lasttok = '-';
+
+	case '.':
+		c = nextc();
+		pushback();
+		if (! isdigit(c))
+			return lasttok = '.';
+		else
+			c = '.';
+		/* FALL THROUGH */
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+		/* It's a number */
+		for (;;) {
+			int gotnumber = FALSE;
+
+			tokadd(c);
+			switch (c) {
+#ifdef BITOPS
+			case 'x':
+			case 'X':
+				if (do_traditional)
+					goto done;
+				if (tok == tokstart + 2)
+					inhex = TRUE;
+				break;
+#endif /* BITOTS */
+			case '.':
+				if (seen_point) {
+					gotnumber = TRUE;
+					break;
+				}
+				seen_point = TRUE;
+				break;
+			case 'e':
+			case 'E':
+				if (inhex)
+					break;
+				if (seen_e) {
+					gotnumber = TRUE;
+					break;
+				}
+				seen_e = TRUE;
+				if ((c = nextc()) == '-' || c == '+')
+					tokadd(c);
+				else
+					pushback();
+				break;
+#ifdef BITOPS
+			case 'a':
+			case 'A':
+			case 'b':
+			case 'B':
+			case 'c':
+			case 'C':
+			case 'D':
+			case 'd':
+			case 'f':
+			case 'F':
+				if (do_traditional || ! inhex)
+					goto done;
+				/* fall through */
+#endif
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				break;
+			default:
+			done:
+				gotnumber = TRUE;
+			}
+			if (gotnumber)
+				break;
+			c = nextc();
+		}
+		if (c != EOF)
+			pushback();
+		else if (do_lint && ! eof_warned) {
+			warning("source file does not end in newline");
+			eof_warned = TRUE;
+		}
+		tokadd('\0');
+#ifdef BITOPS
+		if (! do_traditional && isnondecimal(tokstart))
+			yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
+		else
+#endif /* BITOPS */
+		yylval.nodeval = make_number(atof(tokstart));
+		yylval.nodeval->flags |= PERM;
+		return lasttok = YNUMBER;
+
+	case '&':
+		if ((c = nextc()) == '&') {
+			yylval.nodetypeval = Node_and;
+			allow_newline();
+			want_assign = FALSE;
+			return lasttok = LEX_AND;
+		}
+		pushback();
+		return lasttok = '&';
+
+	case '|':
+		if ((c = nextc()) == '|') {
+			yylval.nodetypeval = Node_or;
+			allow_newline();
+			want_assign = FALSE;
+			return lasttok = LEX_OR;
+		}
+		pushback();
+		return lasttok = '|';
+	}
+
+	if (c != '_' && ! isalpha(c)) {
+		yyerror("Invalid char '%c' in expression\n", c);
+		exit(1);
+	}
+
+	/* it's some type of name-type-thing.  Find its length. */
+	tok = tokstart;
+	while (is_identchar(c)) {
+		tokadd(c);
+		c = nextc();
+	}
+	tokadd('\0');
+	emalloc(tokkey, char *, tok - tokstart, "yylex");
+	memcpy(tokkey, tokstart, tok - tokstart);
+	if (c != EOF)
+		pushback();
+	else if (do_lint && ! eof_warned) {
+		warning("source file does not end in newline");
+		eof_warned = TRUE;
+	}
+
+	/* See if it is a special token. */
+	low = 0;
+	high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
+	while (low <= high) {
+		int i;
+
+		mid = (low + high) / 2;
+		c = *tokstart - tokentab[mid].operator[0];
+		i = c ? c : strcmp(tokstart, tokentab[mid].operator);
+
+		if (i < 0)		/* token < mid */
+			high = mid - 1;
+		else if (i > 0)		/* token > mid */
+			low = mid + 1;
+		else {
+			if (do_lint) {
+				if (tokentab[mid].flags & GAWKX)
+					warning("%s() is a gawk extension",
+						tokentab[mid].operator);
+				if (tokentab[mid].flags & RESX)
+					warning("%s() is a Bell Labs extension",
+						tokentab[mid].operator);
+				if (tokentab[mid].flags & NOT_POSIX)
+					warning("POSIX does not allow %s",
+						tokentab[mid].operator);
+			}
+			if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
+				warning("%s is not supported in old awk",
+						tokentab[mid].operator);
+			if ((do_traditional && (tokentab[mid].flags & GAWKX))
+			    || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
+				break;
+			if (tokentab[mid].class == LEX_BUILTIN
+			    || tokentab[mid].class == LEX_LENGTH
+			   )
+				yylval.lval = mid;
+			else
+				yylval.nodetypeval = tokentab[mid].value;
+
+			free(tokkey);
+			return lasttok = tokentab[mid].class;
+		}
+	}
+
+	yylval.sval = tokkey;
+	if (*lexptr == '(')
+		return lasttok = FUNC_CALL;
+	else {
+		want_assign = TRUE;
+		return lasttok = NAME;
+	}
+}
+
+/* node_common --- common code for allocating a new node */
+
+static NODE *
+node_common(op)
+NODETYPE op;
+{
+	register NODE *r;
+
+	getnode(r);
+	r->type = op;
+	r->flags = MALLOC;
+	/* if lookahead is NL, lineno is 1 too high */
+	if (lexeme && *lexeme == '\n')
+		r->source_line = sourceline - 1;
+	else
+		r->source_line = sourceline;
+	r->source_file = source;
+	return r;
+}
+
+/* node --- allocates a node with defined lnode and rnode. */
+
+NODE *
+node(left, op, right)
+NODE *left, *right;
+NODETYPE op;
+{
+	register NODE *r;
+
+	r = node_common(op);
+	r->lnode = left;
+	r->rnode = right;
+	return r;
+}
+
+/* snode ---	allocate a node with defined subnode and proc for builtin
+		functions. Checks for arg. count and supplies defaults where
+		possible. */
+
+static NODE *
+snode(subn, op, idx)
+NODETYPE op;
+int idx;
+NODE *subn;
+{
+	register NODE *r;
+	register NODE *n;
+	int nexp = 0;
+	int args_allowed;
+
+	r = node_common(op);
+
+	/* traverse expression list to see how many args. given */
+	for (n = subn; n != NULL; n = n->rnode) {
+		nexp++;
+		if (nexp > 3)
+			break;
+	}
+
+	/* check against how many args. are allowed for this builtin */
+	args_allowed = tokentab[idx].flags & ARGS;
+	if (args_allowed && (args_allowed & A(nexp)) == 0)
+		fatal("%s() cannot have %d argument%c",
+			tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
+
+	r->proc = tokentab[idx].ptr;
+
+	/* special case processing for a few builtins */
+	/*
+	 * FIXME: go through these to make sure that everything done
+	 *	  here is really right. Move anything that's not into
+	 *	  the corresponding routine.
+	 */
+	if (nexp == 0 && r->proc == do_length) {
+		subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
+		            Node_expression_list,
+			    (NODE *) NULL);
+	} else if (r->proc == do_match) {
+		if (subn->rnode->lnode->type != Node_regex)
+			subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
+	} else if (r->proc == do_sub || r->proc == do_gsub) {
+		if (subn->lnode->type != Node_regex)
+			subn->lnode = mk_rexp(subn->lnode);
+		if (nexp == 2)
+			append_right(subn, node(node(make_number(0.0),
+						     Node_field_spec,
+						     (NODE *) NULL),
+					        Node_expression_list,
+						(NODE *) NULL));
+		else if (subn->rnode->rnode->lnode->type == Node_val) {
+			if (do_lint)
+				warning("string literal as last arg of substitute");
+		} else if (! isassignable(subn->rnode->rnode->lnode))
+			yyerror("%s third parameter is not a changeable object",
+				r->proc == do_sub ? "sub" : "gsub");
+	} else if (r->proc == do_gensub) {
+		if (subn->lnode->type != Node_regex)
+			subn->lnode = mk_rexp(subn->lnode);
+		if (nexp == 3)
+			append_right(subn, node(node(make_number(0.0),
+						     Node_field_spec,
+						     (NODE *) NULL),
+					        Node_expression_list,
+						(NODE *) NULL));
+	} else if (r->proc == do_split) {
+		if (nexp == 2)
+			append_right(subn,
+			    node(FS_node, Node_expression_list, (NODE *) NULL));
+		n = subn->rnode->rnode->lnode;
+		if (n->type != Node_regex)
+			subn->rnode->rnode->lnode = mk_rexp(n);
+		if (nexp == 2)
+			subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
+	}
+
+	r->subnode = subn;
+	return r;
+}
+
+/*
+ * mkrangenode:
+ * This allocates a Node_line_range node with defined condpair and
+ * zeroes the trigger word to avoid the temptation of assuming that calling
+ * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. 
+ * Otherwise like node().
+ */
+
+static NODE *
+mkrangenode(cpair)
+NODE *cpair;
+{
+	register NODE *r;
+
+	getnode(r);
+	r->type = Node_line_range;
+	r->condpair = cpair;
+	r->triggered = FALSE;
+	return r;
+}
+
+/* make_for_loop --- build a for loop */
+
+static NODE *
+make_for_loop(init, cond, incr)
+NODE *init, *cond, *incr;
+{
+	register FOR_LOOP_HEADER *r;
+	NODE *n;
+
+	emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
+	getnode(n);
+	n->type = Node_illegal;
+	r->init = init;
+	r->cond = cond;
+	r->incr = incr;
+	n->sub.nodep.r.hd = r;
+	return n;
+}
+
+/* dup_parms --- return TRUE if there are duplicate parameters */
+
+static int
+dup_parms(func)
+NODE *func;
+{
+	register NODE *np;
+	char *fname, **names;
+	int count, i, j, dups;
+	NODE *params;
+
+	if (func == NULL)	/* error earlier */
+		return TRUE;
+
+	fname = func->param;
+	count = func->param_cnt;
+	params = func->rnode;
+
+	if (count == 0)		/* no args, no problem */
+		return FALSE;
+
+	if (params == NULL)	/* error earlier */
+		return TRUE;
+
+	emalloc(names, char **, count * sizeof(char *), "dup_parms");
+
+	i = 0;
+	for (np = params; np != NULL; np = np->rnode) {
+		if (np->param == NULL) { /* error earlier, give up, go home */
+			free(names);
+			return TRUE;
+		}
+		names[i++] = np->param;
+	}
+
+	dups = 0;
+	for (i = 1; i < count; i++) {
+		for (j = 0; j < i; j++) {
+			if (strcmp(names[i], names[j]) == 0) {
+				dups++;
+				error(
+	"function `%s': parameter #%d, `%s', duplicates parameter #%d",
+					fname, i+1, names[j], j+1);
+			}
+		}
+	}
+
+	free(names);
+	return (dups > 0 ? TRUE : FALSE);
+}
+
+/*
+ * install:
+ * Install a name in the symbol table, even if it is already there.
+ * Caller must check against redefinition if that is desired. 
+ */
+
+NODE *
+install(name, value)
+char *name;
+NODE *value;
+{
+	register NODE *hp;
+	register size_t len;
+	register int bucket;
+
+	len = strlen(name);
+	bucket = hash(name, len, (unsigned long) HASHSIZE);
+	getnode(hp);
+	hp->type = Node_hashnode;
+	hp->hnext = variables[bucket];
+	variables[bucket] = hp;
+	hp->hlength = len;
+	hp->hvalue = value;
+	hp->hname = name;
+	hp->hvalue->vname = name;
+	return hp->hvalue;
+}
+
+/* lookup --- find the most recent hash node for name installed by install */
+
+NODE *
+lookup(name)
+const char *name;
+{
+	register NODE *bucket;
+	register size_t len;
+
+	len = strlen(name);
+	for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
+			bucket != NULL; bucket = bucket->hnext)
+		if (bucket->hlength == len && STREQN(bucket->hname, name, len))
+			return bucket->hvalue;
+
+	return NULL;
+}
+
+/*
+ * append_right:
+ * Add new to the rightmost branch of LIST.  This uses n^2 time, so we make
+ * a simple attempt at optimizing it.
+ */
+
+static NODE *
+append_right(list, new)
+NODE *list, *new;
+{
+	register NODE *oldlist;
+	static NODE *savefront = NULL, *savetail = NULL;
+
+	oldlist = list;
+	if (savefront == oldlist) {
+		savetail = savetail->rnode = new;
+		return oldlist;
+	} else
+		savefront = oldlist;
+	while (list->rnode != NULL)
+		list = list->rnode;
+	savetail = list->rnode = new;
+	return oldlist;
+}
+
+/*
+ * func_install:
+ * check if name is already installed;  if so, it had better have Null value,
+ * in which case def is added as the value. Otherwise, install name with def
+ * as value. 
+ */
+
+static void
+func_install(params, def)
+NODE *params;
+NODE *def;
+{
+	NODE *r;
+
+	pop_params(params->rnode);
+	pop_var(params, FALSE);
+	r = lookup(params->param);
+	if (r != NULL) {
+		fatal("function name `%s' previously defined", params->param);
+	} else
+		(void) install(params->param, node(params, Node_func, def));
+
+	func_use(params->param, FUNC_DEFINE);
+}
+
+/* pop_var --- remove a variable from the symbol table */
+
+static void
+pop_var(np, freeit)
+NODE *np;
+int freeit;
+{
+	register NODE *bucket, **save;
+	register size_t len;
+	char *name;
+
+	name = np->param;
+	len = strlen(name);
+	save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
+	for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
+		if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
+			*save = bucket->hnext;
+			freenode(bucket);
+			if (freeit)
+				free(np->param);
+			return;
+		}
+		save = &(bucket->hnext);
+	}
+}
+
+/* pop_params --- remove list of function parameters from symbol table */
+
+/*
+ * pop parameters out of the symbol table. do this in reverse order to
+ * avoid reading freed memory if there were duplicated parameters.
+ */
+static void
+pop_params(params)
+NODE *params;
+{
+	if (params == NULL)
+		return;
+	pop_params(params->rnode);
+	pop_var(params, TRUE);
+}
+
+/* make_param --- make NAME into a function parameter */
+
+static NODE *
+make_param(name)
+char *name;
+{
+	NODE *r;
+
+	getnode(r);
+	r->type = Node_param_list;
+	r->rnode = NULL;
+	r->param = name;
+	r->param_cnt = param_counter++;
+	return (install(name, r));
+}
+
+static struct fdesc {
+	char *name;
+	short used;
+	short defined;
+	struct fdesc *next;
+} *ftable[HASHSIZE];
+
+/* func_use --- track uses and definitions of functions */
+
+static void
+func_use(name, how)
+char *name;
+enum defref how;
+{
+	struct fdesc *fp;
+	int len;
+	int ind;
+
+	len = strlen(name);
+	ind = hash(name, len, HASHSIZE);
+
+	for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
+		if (strcmp(fp->name, name) == 0) {
+			if (how == FUNC_DEFINE)
+				fp->defined++;
+			else
+				fp->used++;
+			return;
+		}
+	}
+
+	/* not in the table, fall through to allocate a new one */
+
+	emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
+	memset(fp, '\0', sizeof(struct fdesc));
+	emalloc(fp->name, char *, len + 1, "func_use");
+	strcpy(fp->name, name);
+	if (how == FUNC_DEFINE)
+		fp->defined++;
+	else
+		fp->used++;
+	fp->next = ftable[ind];
+	ftable[ind] = fp;
+}
+
+/* check_funcs --- verify functions that are called but not defined */
+
+static void
+check_funcs()
+{
+	struct fdesc *fp, *next;
+	int i;
+
+	for (i = 0; i < HASHSIZE; i++) {
+		for (fp = ftable[i]; fp != NULL; fp = fp->next) {
+#ifdef REALLYMEAN
+			/* making this the default breaks old code. sigh. */
+			if (fp->defined == 0) {
+				error(
+		"function `%s' called but never defined", fp->name);
+				errcount++;
+			}
+#else
+			if (do_lint && fp->defined == 0)
+				warning(
+		"function `%s' called but never defined", fp->name);
+#endif
+			if (do_lint && fp->used == 0) {
+				warning("function `%s' defined but never called",
+					fp->name);
+			}
+		}
+	}
+
+	/* now let's free all the memory */
+	for (i = 0; i < HASHSIZE; i++) {
+		for (fp = ftable[i]; fp != NULL; fp = next) {
+			next = fp->next;
+			free(fp->name);
+			free(fp);
+		}
+	}
+}
+
+/* param_sanity --- look for parameters that are regexp constants */
+
+static void
+param_sanity(arglist)
+NODE *arglist;
+{
+	NODE *argp, *arg;
+	int i;
+
+	for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
+		arg = argp->lnode;
+		if (arg->type == Node_regex)
+			warning("regexp constant for parameter #%d yields boolean value", i);
+	}
+}
+
+/* variable --- make sure NAME is in the symbol table */
+
+NODE *
+variable(name, can_free, type)
+char *name;
+int can_free;
+NODETYPE type;
+{
+	register NODE *r;
+	static int env_loaded = FALSE;
+
+	if (! env_loaded && STREQ(name, "ENVIRON")) {
+		load_environ();
+		env_loaded = TRUE;
+	}
+	if ((r = lookup(name)) == NULL)
+		r = install(name, node(Nnull_string, type, (NODE *) NULL));
+	else if (can_free)
+		free(name);
+	return r;
+}
+
+/* mk_rexp --- make a regular expression constant */
+
+static NODE *
+mk_rexp(exp)
+NODE *exp;
+{
+	NODE *n;
+
+	if (exp->type == Node_regex)
+		return exp;
+
+	getnode(n);
+	n->type = Node_regex;
+	n->re_exp = exp;
+	n->re_text = NULL;
+	n->re_reg = NULL;
+	n->re_flags = 0;
+	n->re_cnt = 1;
+	return n;
+}
+
+/* isnoeffect --- when used as a statement, has no side effects */
+
+/*
+ * To be completely general, we should recursively walk the parse
+ * tree, to make sure that all the subexpressions also have no effect.
+ * Instead, we just weaken the actual warning that's printed, up above
+ * in the grammar.
+ */
+
+static int
+isnoeffect(type)
+NODETYPE type;
+{
+	switch (type) {
+	case Node_times:
+	case Node_quotient:
+	case Node_mod:
+	case Node_plus:
+	case Node_minus:
+	case Node_subscript:
+	case Node_concat:
+	case Node_exp:
+	case Node_unary_minus:
+	case Node_field_spec:
+	case Node_and:
+	case Node_or:
+	case Node_equal:
+	case Node_notequal:
+	case Node_less:
+	case Node_greater:
+	case Node_leq:
+	case Node_geq:
+	case Node_match:
+	case Node_nomatch:
+	case Node_not:
+	case Node_val:
+	case Node_in_array:
+	case Node_NF:
+	case Node_NR:
+	case Node_FNR:
+	case Node_FS:
+	case Node_RS:
+	case Node_FIELDWIDTHS:
+	case Node_IGNORECASE:
+	case Node_OFS:
+	case Node_ORS:
+	case Node_OFMT:
+	case Node_CONVFMT:
+		return TRUE;
+	default:
+		break;	/* keeps gcc -Wall happy */
+	}
+
+	return FALSE;
+}
+
+/* isassignable --- can this node be assigned to? */
+
+static int
+isassignable(n)
+register NODE *n;
+{
+	switch (n->type) {
+	case Node_var:
+	case Node_FIELDWIDTHS:
+	case Node_RS:
+	case Node_FS:
+	case Node_FNR:
+	case Node_NR:
+	case Node_NF:
+	case Node_IGNORECASE:
+	case Node_OFMT:
+	case Node_CONVFMT:
+	case Node_ORS:
+	case Node_OFS:
+	case Node_field_spec:
+	case Node_subscript:
+		return TRUE;
+	case Node_param_list:
+		return ((n->flags & FUNC) == 0);  /* ok if not func name */
+	default:
+		break;	/* keeps gcc -Wall happy */
+	}
+	return FALSE;
+}
diff --git a/contrib/awk/builtin.c b/contrib/awk/builtin.c
new file mode 100644
index 0000000..0686041
--- /dev/null
+++ b/contrib/awk/builtin.c
@@ -0,0 +1,2048 @@
+/*
+ * builtin.c - Builtin functions and various utility procedures 
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+
+#include "awk.h"
+#include <assert.h>
+#undef HUGE
+#undef CHARBITS
+#undef INTBITS
+#include <math.h>
+#include "random.h"
+
+/* can declare these, since we always use the random shipped with gawk */
+extern char *initstate P((unsigned seed, char *state, int n));
+extern char *setstate P((char *state));
+extern long random P((void));
+extern void srandom P((unsigned int seed));
+
+extern NODE **fields_arr;
+extern int output_is_tty;
+
+static NODE *sub_common P((NODE *tree, int how_many, int backdigs));
+NODE *format_tree P((const char *, int, NODE *));
+
+#ifdef _CRAY
+/* Work around a problem in conversion of doubles to exact integers. */
+#include <float.h>
+#define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
+#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
+
+/* Force the standard C compiler to use the library math functions. */
+extern double exp(double);
+double (*Exp)() = exp;
+#define exp(x) (*Exp)(x)
+extern double log(double);
+double (*Log)() = log;
+#define log(x) (*Log)(x)
+#else
+#define Floor(n) floor(n)
+#define Ceil(n) ceil(n)
+#endif
+
+#define DEFAULT_G_PRECISION 6
+
+#ifdef GFMT_WORKAROUND
+/* semi-temporary hack, mostly to gracefully handle VMS */
+static void sgfmt P((char *buf, const char *format, int alt,
+		     int fwidth, int precision, double value));
+#endif /* GFMT_WORKAROUND */
+
+/*
+ * Since we supply the version of random(), we know what
+ * value to use here.
+ */
+#define GAWK_RANDOM_MAX 0x7fffffffL
+
+static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
+		       const char *from, struct redirect *rp, int flush));
+
+/* efwrite --- like fwrite, but with error checking */
+
+static void
+efwrite(ptr, size, count, fp, from, rp, flush)
+const void *ptr;
+size_t size, count;
+FILE *fp;
+const char *from;
+struct redirect *rp;
+int flush;
+{
+	errno = 0;
+	if (fwrite(ptr, size, count, fp) != count)
+		goto wrerror;
+	if (flush
+	  && ((fp == stdout && output_is_tty)
+	   || (rp && (rp->flag & RED_NOBUF)))) {
+		fflush(fp);
+		if (ferror(fp))
+			goto wrerror;
+	}
+	return;
+
+wrerror:
+	fatal("%s to \"%s\" failed (%s)", from,
+		rp ? rp->value : "standard output",
+		errno ? strerror(errno) : "reason unknown");
+}
+
+/* do_exp --- exponential function */
+
+NODE *
+do_exp(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d, res;
+
+	tmp = tree_eval(tree->lnode);
+	d = force_number(tmp);
+	free_temp(tmp);
+	errno = 0;
+	res = exp(d);
+	if (errno == ERANGE)
+		warning("exp argument %g is out of range", d);
+	return tmp_number((AWKNUM) res);
+}
+
+/* stdfile --- return fp for a standard file */
+
+/*
+ * This function allows `fflush("/dev/stdout")' to work.
+ * The other files will be available via getredirect().
+ * /dev/stdin is not included, since fflush is only for output.
+ */
+
+static FILE *
+stdfile(name, len)
+char *name;
+size_t len;
+{
+	if (len == 11) {
+		if (STREQN(name, "/dev/stderr", 11))
+			return stderr;
+		else if (STREQN(name, "/dev/stdout", 11))
+			return stdout;
+	}
+
+	return NULL;
+}
+
+/* do_fflush --- flush output, either named file or pipe or everything */
+
+NODE *
+do_fflush(tree)
+NODE *tree;
+{
+	struct redirect *rp;
+	NODE *tmp;
+	FILE *fp;
+	int status = 0;
+	char *file;
+
+	/* fflush() --- flush stdout */
+	if (tree == NULL) {
+		status = fflush(stdout);
+		return tmp_number((AWKNUM) status);
+	}
+
+	tmp = tree_eval(tree->lnode);
+	tmp = force_string(tmp);
+	file = tmp->stptr;
+
+	/* fflush("") --- flush all */
+	if (tmp->stlen == 0) {
+		status = flush_io();
+		free_temp(tmp);
+		return tmp_number((AWKNUM) status);
+	}
+
+	rp = getredirect(tmp->stptr, tmp->stlen);
+	status = 1;
+	if (rp != NULL) {
+		if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
+			/* if (do_lint) */
+				warning(
+		"fflush: cannot flush: %s `%s' opened for reading, not writing",
+				(rp->flag & RED_PIPE) ? "pipe" : "file",
+				file);
+			free_temp(tmp);
+			return tmp_number((AWKNUM) status);
+		}
+		fp = rp->fp;
+		if (fp != NULL)
+			status = fflush(fp);
+	} else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
+		status = fflush(fp);
+	} else
+		warning("fflush: `%s' is not an open file or pipe", file);
+	free_temp(tmp);
+	return tmp_number((AWKNUM) status);
+}
+
+/* do_index --- find index of a string */
+
+NODE *
+do_index(tree)
+NODE *tree;
+{
+	NODE *s1, *s2;
+	register char *p1, *p2;
+	register size_t l1, l2;
+	long ret;
+
+
+	s1 = tree_eval(tree->lnode);
+	s2 = tree_eval(tree->rnode->lnode);
+	force_string(s1);
+	force_string(s2);
+	p1 = s1->stptr;
+	p2 = s2->stptr;
+	l1 = s1->stlen;
+	l2 = s2->stlen;
+	ret = 0;
+
+	/* IGNORECASE will already be false if posix */
+	if (IGNORECASE) {
+		while (l1 > 0) {
+			if (l2 > l1)
+				break;
+			if (casetable[(int)*p1] == casetable[(int)*p2]
+			    && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
+				ret = 1 + s1->stlen - l1;
+				break;
+			}
+			l1--;
+			p1++;
+		}
+	} else {
+		while (l1 > 0) {
+			if (l2 > l1)
+				break;
+			if (*p1 == *p2
+			    && (l2 == 1 || STREQN(p1, p2, l2))) {
+				ret = 1 + s1->stlen - l1;
+				break;
+			}
+			l1--;
+			p1++;
+		}
+	}
+	free_temp(s1);
+	free_temp(s2);
+	return tmp_number((AWKNUM) ret);
+}
+
+/* double_to_int --- convert double to int, used several places */
+
+double
+double_to_int(d)
+double d;
+{
+	if (d >= 0)
+		d = Floor(d);
+	else
+		d = Ceil(d);
+	return d;
+}
+
+/* do_int --- convert double to int for awk */
+
+NODE *
+do_int(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d;
+
+	tmp = tree_eval(tree->lnode);
+	d = force_number(tmp);
+	d = double_to_int(d);
+	free_temp(tmp);
+	return tmp_number((AWKNUM) d);
+}
+
+/* do_length --- length of a string or $0 */
+
+NODE *
+do_length(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	size_t len;
+
+	tmp = tree_eval(tree->lnode);
+	len = force_string(tmp)->stlen;
+	free_temp(tmp);
+	return tmp_number((AWKNUM) len);
+}
+
+/* do_log --- the log function */
+
+NODE *
+do_log(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d, arg;
+
+	tmp = tree_eval(tree->lnode);
+	arg = (double) force_number(tmp);
+	if (arg < 0.0)
+		warning("log called with negative argument %g", arg);
+	d = log(arg);
+	free_temp(tmp);
+	return tmp_number((AWKNUM) d);
+}
+
+/*
+ * format_tree() formats nodes of a tree, starting with a left node,
+ * and accordingly to a fmt_string providing a format like in
+ * printf family from C library.  Returns a string node which value
+ * is a formatted string.  Called by  sprintf function.
+ *
+ * It is one of the uglier parts of gawk.  Thanks to Michal Jaegermann
+ * for taming this beast and making it compatible with ANSI C.
+ */
+
+NODE *
+format_tree(fmt_string, n0, carg)
+const char *fmt_string;
+int n0;
+register NODE *carg;
+{
+/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
+/* difference of pointers should be of ptrdiff_t type, but let us be kind */
+#define bchunk(s, l) if (l) { \
+	while ((l) > ofre) { \
+		long olen = obufout - obuf; \
+		erealloc(obuf, char *, osiz * 2, "format_tree"); \
+		ofre += osiz; \
+		osiz *= 2; \
+		obufout = obuf + olen; \
+	} \
+	memcpy(obufout, s, (size_t) (l)); \
+	obufout += (l); \
+	ofre -= (l); \
+}
+
+/* copy one byte from 's' to 'obufout' checking for space in the process */
+#define bchunk_one(s) { \
+	if (ofre <= 0) { \
+		long olen = obufout - obuf; \
+		erealloc(obuf, char *, osiz * 2, "format_tree"); \
+		ofre += osiz; \
+		osiz *= 2; \
+		obufout = obuf + olen; \
+	} \
+	*obufout++ = *s; \
+	--ofre; \
+}
+
+/* Is there space for something L big in the buffer? */
+#define chksize(l)  if ((l) > ofre) { \
+	long olen = obufout - obuf; \
+	erealloc(obuf, char *, osiz * 2, "format_tree"); \
+	obufout = obuf + olen; \
+	ofre += osiz; \
+	osiz *= 2; \
+}
+
+/*
+ * Get the next arg to be formatted.  If we've run out of args,
+ * return "" (Null string) 
+ */
+#define parse_next_arg() { \
+	if (carg == NULL) { \
+		toofew = TRUE; \
+		break; \
+	} else { \
+		arg = tree_eval(carg->lnode); \
+		carg = carg->rnode; \
+	} \
+}
+
+	NODE *r;
+	int toofew = FALSE;
+	char *obuf, *obufout;
+	size_t osiz, ofre;
+	char *chbuf;
+	const char *s0, *s1;
+	int cs1;
+	NODE *arg;
+	long fw, prec;
+	int lj, alt, big, bigbig, small, have_prec, need_format;
+	long *cur = NULL;
+#ifdef sun386		/* Can't cast unsigned (int/long) from ptr->value */
+	long tmp_uval;	/* on 386i 4.0.1 C compiler -- it just hangs */
+#endif
+	unsigned long uval;
+	int sgn;
+	int base = 0;
+	char cpbuf[30];		/* if we have numbers bigger than 30 */
+	char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */
+	char *cp;
+	char *fill;
+	double tmpval;
+	char signchar = FALSE;
+	size_t len;
+	static char sp[] = " ";
+	static char zero_string[] = "0";
+	static char lchbuf[] = "0123456789abcdef";
+	static char Uchbuf[] = "0123456789ABCDEF";
+
+#define INITIAL_OUT_SIZE	512
+	emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
+	obufout = obuf;
+	osiz = INITIAL_OUT_SIZE;
+	ofre = osiz - 1;
+
+	need_format = FALSE;
+
+	s0 = s1 = fmt_string;
+	while (n0-- > 0) {
+		if (*s1 != '%') {
+			s1++;
+			continue;
+		}
+		need_format = TRUE;
+		bchunk(s0, s1 - s0);
+		s0 = s1;
+		cur = &fw;
+		fw = 0;
+		prec = 0;
+		have_prec = FALSE;
+		signchar = FALSE;
+		lj = alt = big = bigbig = small = FALSE;
+		fill = sp;
+		cp = cend;
+		chbuf = lchbuf;
+		s1++;
+
+retry:
+		if (n0-- <= 0)	/* ran out early! */
+			break;
+
+		switch (cs1 = *s1++) {
+		case (-1):	/* dummy case to allow for checking */
+check_pos:
+			if (cur != &fw)
+				break;		/* reject as a valid format */
+			goto retry;
+		case '%':
+			need_format = FALSE;
+			bchunk_one("%");
+			s0 = s1;
+			break;
+
+		case '0':
+			if (lj)
+				goto retry;
+			if (cur == &fw)
+				fill = zero_string;
+			/* FALL through */
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			if (cur == NULL)
+				break;
+			if (prec >= 0)
+				*cur = cs1 - '0';
+			/*
+			 * with a negative precision *cur is already set
+			 * to -1, so it will remain negative, but we have
+			 * to "eat" precision digits in any case
+			 */
+			while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
+				--n0;
+				*cur = *cur * 10 + *s1++ - '0';
+			}
+			if (prec < 0) 	/* negative precision is discarded */
+				have_prec = FALSE;
+			if (cur == &prec)
+				cur = NULL;
+			if (n0 == 0)	/* badly formatted control string */
+				continue;
+			goto retry;
+		case '*':
+			if (cur == NULL)
+				break;
+			parse_next_arg();
+			*cur = force_number(arg);
+			free_temp(arg);
+			if (*cur < 0 && cur == &fw) {
+				*cur = -*cur;
+				lj++;
+			}
+			if (cur == &prec) {
+				if (*cur >= 0)
+					have_prec = TRUE;
+				else
+					have_prec = FALSE;
+				cur = NULL;
+			}
+			goto retry;
+		case ' ':		/* print ' ' or '-' */
+					/* 'space' flag is ignored */
+					/* if '+' already present  */
+			if (signchar != FALSE) 
+				goto check_pos;
+			/* FALL THROUGH */
+		case '+':		/* print '+' or '-' */
+			signchar = cs1;
+			goto check_pos;
+		case '-':
+			if (prec < 0)
+				break;
+			if (cur == &prec) {
+				prec = -1;
+				goto retry;
+			}
+			fill = sp;      /* if left justified then other */
+			lj++; 		/* filling is ignored */
+			goto check_pos;
+		case '.':
+			if (cur != &fw)
+				break;
+			cur = &prec;
+			have_prec = TRUE;
+			goto retry;
+		case '#':
+			alt = TRUE;
+			goto check_pos;
+		case 'l':
+			if (big)
+				break;
+			else {
+				static int warned = FALSE;
+				
+				if (do_lint && ! warned) {
+					warning("`l' is meaningless in awk formats; ignored");
+					warned = TRUE;
+				}
+				if (do_posix)
+					fatal("'l' is not permitted in POSIX awk formats");
+			}
+			big = TRUE;
+			goto retry;
+		case 'L':
+			if (bigbig)
+				break;
+			else {
+				static int warned = FALSE;
+				
+				if (do_lint && ! warned) {
+					warning("`L' is meaningless in awk formats; ignored");
+					warned = TRUE;
+				}
+				if (do_posix)
+					fatal("'L' is not permitted in POSIX awk formats");
+			}
+			bigbig = TRUE;
+			goto retry;
+		case 'h':
+			if (small)
+				break;
+			else {
+				static int warned = FALSE;
+				
+				if (do_lint && ! warned) {
+					warning("`h' is meaningless in awk formats; ignored");
+					warned = TRUE;
+				}
+				if (do_posix)
+					fatal("'h' is not permitted in POSIX awk formats");
+			}
+			small = TRUE;
+			goto retry;
+		case 'c':
+			need_format = FALSE;
+			parse_next_arg();
+			/* user input that looks numeric is numeric */
+			if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
+				(void) force_number(arg);
+			if (arg->flags & NUMBER) {
+#ifdef sun386
+				tmp_uval = arg->numbr; 
+				uval = (unsigned long) tmp_uval;
+#else
+				uval = (unsigned long) arg->numbr;
+#endif
+				cpbuf[0] = uval;
+				prec = 1;
+				cp = cpbuf;
+				goto pr_tail;
+			}
+			if (have_prec == FALSE)
+				prec = 1;
+			else if (prec > arg->stlen)
+				prec = arg->stlen;
+			cp = arg->stptr;
+			goto pr_tail;
+		case 's':
+			need_format = FALSE;
+			parse_next_arg();
+			arg = force_string(arg);
+			if (! have_prec || prec > arg->stlen)
+				prec = arg->stlen;
+			cp = arg->stptr;
+			goto pr_tail;
+		case 'd':
+		case 'i':
+			need_format = FALSE;
+			parse_next_arg();
+			tmpval = force_number(arg);
+			if (tmpval < 0) {
+				if (tmpval < LONG_MIN)
+					goto out_of_range;
+				sgn = TRUE;
+				uval = - (unsigned long) (long) tmpval;
+			} else {
+				/* Use !, so that NaNs are out of range.
+				   The cast avoids a SunOS 4.1.x cc bug.  */
+				if (! (tmpval <= (unsigned long) ULONG_MAX))
+					goto out_of_range;
+				sgn = FALSE;
+				uval = (unsigned long) tmpval;
+			}
+			do {
+				*--cp = (char) ('0' + uval % 10);
+				uval /= 10;
+			} while (uval > 0);
+			if (sgn)
+				*--cp = '-';
+			else if (signchar)
+				*--cp = signchar;
+			/*
+			 * precision overrides '0' flags. however, for
+			 * integer formats, precsion is minimum number of
+			 * *digits*, not characters, thus we want to fill
+			 * with zeroes.
+			 */
+			if (have_prec)
+				fill = zero_string;
+			if (prec > fw)
+				fw = prec;
+			prec = cend - cp;
+			if (fw > prec && ! lj && fill != sp
+			    && (*cp == '-' || signchar)) {
+				bchunk_one(cp);
+				cp++;
+				prec--;
+				fw--;
+			}
+			goto pr_tail;
+		case 'X':
+			chbuf = Uchbuf;	/* FALL THROUGH */
+		case 'x':
+			base += 6;	/* FALL THROUGH */
+		case 'u':
+			base += 2;	/* FALL THROUGH */
+		case 'o':
+			base += 8;
+			need_format = FALSE;
+			parse_next_arg();
+			tmpval = force_number(arg);
+			if (tmpval < 0) {
+				if (tmpval < LONG_MIN)
+					goto out_of_range;
+				uval = (unsigned long) (long) tmpval;
+			} else {
+				/* Use !, so that NaNs are out of range.
+				   The cast avoids a SunOS 4.1.x cc bug.  */
+				if (! (tmpval <= (unsigned long) ULONG_MAX))
+					goto out_of_range;
+				uval = (unsigned long) tmpval;
+			}
+			/*
+			 * precision overrides '0' flags. however, for
+			 * integer formats, precsion is minimum number of
+			 * *digits*, not characters, thus we want to fill
+			 * with zeroes.
+			 */
+			if (have_prec)
+				fill = zero_string;
+			do {
+				*--cp = chbuf[uval % base];
+				uval /= base;
+			} while (uval > 0);
+			if (alt) {
+				if (base == 16) {
+					*--cp = cs1;
+					*--cp = '0';
+					if (fill != sp) {
+						bchunk(cp, 2);
+						cp += 2;
+						fw -= 2;
+					}
+				} else if (base == 8)
+					*--cp = '0';
+			}
+			base = 0;
+			if (prec > fw)
+				fw = prec;
+			prec = cend - cp;
+	pr_tail:
+			if (! lj) {
+				while (fw > prec) {
+			    		bchunk_one(fill);
+					fw--;
+				}
+			}
+			bchunk(cp, (int) prec);
+			while (fw > prec) {
+				bchunk_one(fill);
+				fw--;
+			}
+			s0 = s1;
+			free_temp(arg);
+			break;
+
+     out_of_range:
+			/* out of range - emergency use of %g format */
+			cs1 = 'g';
+			goto format_float;
+
+		case 'g':
+		case 'G':
+		case 'e':
+		case 'f':
+		case 'E':
+			need_format = FALSE;
+			parse_next_arg();
+			tmpval = force_number(arg);
+     format_float:
+			free_temp(arg);
+			if (! have_prec)
+				prec = DEFAULT_G_PRECISION;
+			chksize(fw + prec + 9);	/* 9 == slop */
+
+			cp = cpbuf;
+			*cp++ = '%';
+			if (lj)
+				*cp++ = '-';
+			if (signchar)
+				*cp++ = signchar;
+			if (alt)
+				*cp++ = '#';
+			if (fill != sp)
+				*cp++ = '0';
+			cp = strcpy(cp, "*.*") + 3;
+			*cp++ = cs1;
+			*cp = '\0';
+#ifndef GFMT_WORKAROUND
+			(void) sprintf(obufout, cpbuf,
+				       (int) fw, (int) prec, (double) tmpval);
+#else	/* GFMT_WORKAROUND */
+			if (cs1 == 'g' || cs1 == 'G')
+				sgfmt(obufout, cpbuf, (int) alt,
+				       (int) fw, (int) prec, (double) tmpval);
+			else
+				(void) sprintf(obufout, cpbuf,
+				       (int) fw, (int) prec, (double) tmpval);
+#endif	/* GFMT_WORKAROUND */
+			len = strlen(obufout);
+			ofre -= len;
+			obufout += len;
+			s0 = s1;
+			break;
+		default:
+			break;
+		}
+		if (toofew)
+			fatal("%s\n\t`%s'\n\t%*s%s",
+			"not enough arguments to satisfy format string",
+			fmt_string, s1 - fmt_string - 2, "",
+			"^ ran out for this one"
+			);
+	}
+	if (do_lint) {
+		if (need_format)
+			warning(
+			"printf format specifier does not have control letter");
+		if (carg != NULL)
+			warning(
+			"too many arguments supplied for format string");
+	}
+	bchunk(s0, s1 - s0);
+	r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
+	r->flags |= TEMP;
+	return r;
+}
+
+/* do_sprintf --- perform sprintf */
+
+NODE *
+do_sprintf(tree)
+NODE *tree;
+{
+	NODE *r;
+	NODE *sfmt = force_string(tree_eval(tree->lnode));
+
+	r = format_tree(sfmt->stptr, sfmt->stlen, tree->rnode);
+	free_temp(sfmt);
+	return r;
+}
+
+/* do_printf --- perform printf, including redirection */
+
+void
+do_printf(tree)
+register NODE *tree;
+{
+	struct redirect *rp = NULL;
+	register FILE *fp;
+
+	if (tree->lnode == NULL) {
+		if (do_traditional) {
+			if (do_lint)
+				warning("printf: no arguments");
+			return;	/* bwk accepts it silently */
+		}
+		fatal("printf: no arguments");
+	}
+
+	if (tree->rnode != NULL) {
+		int errflg;	/* not used, sigh */
+
+		rp = redirect(tree->rnode, &errflg);
+		if (rp != NULL) {
+			fp = rp->fp;
+			if (fp == NULL)
+				return;
+		} else
+			return;
+	} else
+		fp = stdout;
+	tree = do_sprintf(tree->lnode);
+	efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE);
+	free_temp(tree);
+}
+
+/* do_sqrt --- do the sqrt function */
+
+NODE *
+do_sqrt(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double arg;
+
+	tmp = tree_eval(tree->lnode);
+	arg = (double) force_number(tmp);
+	free_temp(tmp);
+	if (arg < 0.0)
+		warning("sqrt called with negative argument %g", arg);
+	return tmp_number((AWKNUM) sqrt(arg));
+}
+
+/* do_substr --- do the substr function */
+
+NODE *
+do_substr(tree)
+NODE *tree;
+{
+	NODE *t1, *t2, *t3;
+	NODE *r;
+	register size_t indx;
+	size_t length;
+	double d_index, d_length;
+
+	t1 = force_string(tree_eval(tree->lnode));
+	t2 = tree_eval(tree->rnode->lnode);
+	d_index = force_number(t2);
+	free_temp(t2);
+
+	if (d_index < 1.0) {
+		if (do_lint)
+			warning("substr: start index %g invalid, using 1",
+				d_index);
+		d_index = 1;
+	}
+	if (do_lint && double_to_int(d_index) != d_index)
+		warning("substr: non-integer start index %g will be truncated",
+			d_index);
+
+	indx = d_index - 1;	/* awk indices are from 1, C's are from 0 */
+
+	if (tree->rnode->rnode == NULL) {	/* third arg. missing */
+		/* use remainder of string */
+		length = t1->stlen - indx;
+	} else {
+		t3 = tree_eval(tree->rnode->rnode->lnode);
+		d_length = force_number(t3);
+		free_temp(t3);
+		if (d_length <= 0.0) {
+			if (do_lint)
+				warning("substr: length %g is <= 0", d_length);
+			free_temp(t1);
+			return Nnull_string;
+		}
+		if (do_lint && double_to_int(d_length) != d_length)
+			warning(
+		"substr: non-integer length %g will be truncated",
+				d_length);
+		length = d_length;
+	}
+
+	if (t1->stlen == 0) {
+		if (do_lint)
+			warning("substr: source string is zero length");
+		free_temp(t1);
+		return Nnull_string;
+	}
+	if ((indx + length) > t1->stlen) {
+		if (do_lint)
+			warning(
+	"substr: length %d at position %d exceeds length of first argument (%d)",
+			length, indx+1, t1->stlen);
+		length = t1->stlen - indx;
+	}
+	if (indx >= t1->stlen) {
+		if (do_lint)
+			warning("substr: start index %d is past end of string",
+				indx+1);
+		free_temp(t1);
+		return Nnull_string;
+	}
+	r = tmp_string(t1->stptr + indx, length);
+	free_temp(t1);
+	return r;
+}
+
+/* do_strftime --- format a time stamp */
+
+NODE *
+do_strftime(tree)
+NODE *tree;
+{
+	NODE *t1, *t2, *ret;
+	struct tm *tm;
+	time_t fclock;
+	char *bufp;
+	size_t buflen, bufsize;
+	char buf[BUFSIZ];
+	static char def_format[] = "%a %b %d %H:%M:%S %Z %Y";
+	char *format;
+	int formatlen;
+
+	/* set defaults first */
+	format = def_format;	/* traditional date format */
+	formatlen = strlen(format);
+	(void) time(&fclock);	/* current time of day */
+
+	t1 = t2 = NULL;
+	if (tree != NULL) {	/* have args */
+		if (tree->lnode != NULL) {
+			t1 = force_string(tree_eval(tree->lnode));
+			format = t1->stptr;
+			formatlen = t1->stlen;
+			if (formatlen == 0) {
+				if (do_lint)
+					warning("strftime called with empty format string");
+				free_temp(t1);
+				return tmp_string("", 0);
+			}
+		}
+	
+		if (tree->rnode != NULL) {
+			t2 = tree_eval(tree->rnode->lnode);
+			fclock = (time_t) force_number(t2);
+			free_temp(t2);
+		}
+	}
+
+	tm = localtime(&fclock);
+
+	bufp = buf;
+	bufsize = sizeof(buf);
+	for (;;) {
+		*bufp = '\0';
+		buflen = strftime(bufp, bufsize, format, tm);
+		/*
+		 * buflen can be zero EITHER because there's not enough
+		 * room in the string, or because the control command
+		 * goes to the empty string. Make a reasonable guess that
+		 * if the buffer is 1024 times bigger than the length of the
+		 * format string, it's not failing for lack of room.
+		 * Thanks to Paul Eggert for pointing out this issue.
+		 */
+		if (buflen > 0 || bufsize >= 1024 * formatlen)
+			break;
+		bufsize *= 2;
+		if (bufp == buf)
+			emalloc(bufp, char *, bufsize, "do_strftime");
+		else
+			erealloc(bufp, char *, bufsize, "do_strftime");
+	}
+	ret = tmp_string(bufp, buflen);
+	if (bufp != buf)
+		free(bufp);
+	if (t1)
+		free_temp(t1);
+	return ret;
+}
+
+/* do_systime --- get the time of day */
+
+NODE *
+do_systime(tree)
+NODE *tree;
+{
+	time_t lclock;
+
+	(void) time(&lclock);
+	return tmp_number((AWKNUM) lclock);
+}
+
+
+
+/* do_system --- run an external command */
+
+NODE *
+do_system(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	int ret = 0;
+	char *cmd;
+	char save;
+
+	(void) flush_io();     /* so output is synchronous with gawk's */
+	tmp = tree_eval(tree->lnode);
+	cmd = force_string(tmp)->stptr;
+
+	if (cmd && *cmd) {
+		/* insure arg to system is zero-terminated */
+
+		/*
+		 * From: David Trueman <david@cs.dal.ca>
+		 * To: arnold@cc.gatech.edu (Arnold Robbins)
+		 * Date: Wed, 3 Nov 1993 12:49:41 -0400
+		 * 
+		 * It may not be necessary to save the character, but
+		 * I'm not sure.  It would normally be the field
+		 * separator.  If the parse has not yet gone beyond
+		 * that, it could mess up (although I doubt it).  If
+		 * FIELDWIDTHS is being used, it might be the first
+		 * character of the next field.  Unless someone wants
+		 * to check it out exhaustively, I suggest saving it
+		 * for now...
+		 */
+		save = cmd[tmp->stlen];
+		cmd[tmp->stlen] = '\0';
+
+		ret = system(cmd);
+		ret = (ret >> 8) & 0xff;
+
+		cmd[tmp->stlen] = save;
+	}
+	free_temp(tmp);
+	return tmp_number((AWKNUM) ret);
+}
+
+extern NODE **fmt_list;  /* declared in eval.c */
+
+/* do_print --- print items, separated by OFS, terminated with ORS */
+
+void 
+do_print(tree)
+register NODE *tree;
+{
+	register NODE **t;
+	struct redirect *rp = NULL;
+	register FILE *fp;
+	int numnodes, i;
+	NODE *save;
+
+	if (tree->rnode) {
+		int errflg;		/* not used, sigh */
+
+		rp = redirect(tree->rnode, &errflg);
+		if (rp != NULL) {
+			fp = rp->fp;
+			if (fp == NULL)
+				return;
+		} else
+			return;
+	} else
+		fp = stdout;
+
+	/*
+	 * General idea is to evaluate all the expressions first and
+	 * then print them, otherwise you get suprising behavior.
+	 * See test/prtoeval.awk for an example program.
+	 */
+	save = tree = tree->lnode;
+	for (numnodes = 0; tree != NULL; tree = tree->rnode)
+		numnodes++;
+	emalloc(t, NODE **, numnodes * sizeof(NODE *), "do_print");
+
+	tree = save;
+	for (i = 0; tree != NULL; i++, tree = tree->rnode) {
+		NODE *n;
+
+		/* Here lies the wumpus. R.I.P. */
+		n = tree_eval(tree->lnode);
+		t[i] = dupnode(n);
+		free_temp(n);
+
+		if (t[i]->flags & NUMBER) {
+			if (OFMTidx == CONVFMTidx)
+				(void) force_string(t[i]);
+			else
+				t[i] = format_val(OFMT, OFMTidx, t[i]);
+		}
+	}
+
+	for (i = 0; i < numnodes; i++) {
+		efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE);
+		unref(t[i]);
+		if (i != numnodes - 1) {
+			if (OFSlen > 0)
+				efwrite(OFS, sizeof(char), (size_t) OFSlen,
+					fp, "print", rp, FALSE);
+		}
+	}
+	if (ORSlen > 0)
+		efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
+	free(t);
+}
+
+/* do_tolower --- lower case a string */
+
+NODE *
+do_tolower(tree)
+NODE *tree;
+{
+	NODE *t1, *t2;
+	register unsigned char *cp, *cp2;
+
+	t1 = tree_eval(tree->lnode);
+	t1 = force_string(t1);
+	t2 = tmp_string(t1->stptr, t1->stlen);
+	for (cp = (unsigned char *)t2->stptr,
+	     cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+		if (ISUPPER(*cp))
+			*cp = tolower(*cp);
+	free_temp(t1);
+	return t2;
+}
+
+/* do_toupper --- upper case a string */
+
+NODE *
+do_toupper(tree)
+NODE *tree;
+{
+	NODE *t1, *t2;
+	register unsigned char *cp, *cp2;
+
+	t1 = tree_eval(tree->lnode);
+	t1 = force_string(t1);
+	t2 = tmp_string(t1->stptr, t1->stlen);
+	for (cp = (unsigned char *)t2->stptr,
+	     cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
+		if (ISLOWER(*cp))
+			*cp = toupper(*cp);
+	free_temp(t1);
+	return t2;
+}
+
+/* do_atan2 --- do the atan2 function */
+
+NODE *
+do_atan2(tree)
+NODE *tree;
+{
+	NODE *t1, *t2;
+	double d1, d2;
+
+	t1 = tree_eval(tree->lnode);
+	t2 = tree_eval(tree->rnode->lnode);
+	d1 = force_number(t1);
+	d2 = force_number(t2);
+	free_temp(t1);
+	free_temp(t2);
+	return tmp_number((AWKNUM) atan2(d1, d2));
+}
+
+/* do_sin --- do the sin function */
+
+NODE *
+do_sin(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d;
+
+	tmp = tree_eval(tree->lnode);
+	d = sin((double) force_number(tmp));
+	free_temp(tmp);
+	return tmp_number((AWKNUM) d);
+}
+
+/* do_cos --- do the cos function */
+
+NODE *
+do_cos(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d;
+
+	tmp = tree_eval(tree->lnode);
+	d = cos((double) force_number(tmp));
+	free_temp(tmp);
+	return tmp_number((AWKNUM) d);
+}
+
+/* do_rand --- do the rand function */
+
+static int firstrand = TRUE;
+static char state[512];
+
+/* ARGSUSED */
+NODE *
+do_rand(tree)
+NODE *tree;
+{
+	if (firstrand) {
+		(void) initstate((unsigned) 1, state, sizeof state);
+		srandom(1);
+		firstrand = FALSE;
+	}
+	return tmp_number((AWKNUM) random() / GAWK_RANDOM_MAX);
+}
+
+/* do_srand --- seed the random number generator */
+
+NODE *
+do_srand(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	static long save_seed = 1;
+	long ret = save_seed;	/* SVR4 awk srand returns previous seed */
+
+	if (firstrand) {
+		(void) initstate((unsigned) 1, state, sizeof state);
+		/* don't need to srandom(1), we're changing the seed below */
+		firstrand = FALSE;
+	} else
+		(void) setstate(state);
+
+	if (tree == NULL)
+		srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
+	else {
+		tmp = tree_eval(tree->lnode);
+		srandom((unsigned int) (save_seed = (long) force_number(tmp)));
+		free_temp(tmp);
+	}
+	return tmp_number((AWKNUM) ret);
+}
+
+/* do_match --- match a regexp, set RSTART and RLENGTH */
+
+NODE *
+do_match(tree)
+NODE *tree;
+{
+	NODE *t1;
+	int rstart;
+	AWKNUM rlength;
+	Regexp *rp;
+
+	t1 = force_string(tree_eval(tree->lnode));
+	tree = tree->rnode->lnode;
+	rp = re_update(tree);
+	rstart = research(rp, t1->stptr, 0, t1->stlen, TRUE);
+	if (rstart >= 0) {	/* match succeded */
+		rstart++;	/* 1-based indexing */
+		rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
+	} else {		/* match failed */
+		rstart = 0;
+		rlength = -1.0;
+	}
+	free_temp(t1);
+	unref(RSTART_node->var_value);
+	RSTART_node->var_value = make_number((AWKNUM) rstart);
+	unref(RLENGTH_node->var_value);
+	RLENGTH_node->var_value = make_number(rlength);
+	return tmp_number((AWKNUM) rstart);
+}
+
+/* sub_common --- the common code (does the work) for sub, gsub, and gensub */
+
+/*
+ * Gsub can be tricksy; particularly when handling the case of null strings.
+ * The following awk code was useful in debugging problems.  It is too bad
+ * that it does not readily translate directly into the C code, below.
+ * 
+ * #! /usr/local/bin/mawk -f
+ * 
+ * BEGIN {
+ * 	TRUE = 1; FALSE = 0
+ * 	print "--->", mygsub("abc", "b+", "FOO")
+ * 	print "--->", mygsub("abc", "x*", "X")
+ * 	print "--->", mygsub("abc", "b*", "X")
+ * 	print "--->", mygsub("abc", "c", "X")
+ * 	print "--->", mygsub("abc", "c+", "X")
+ * 	print "--->", mygsub("abc", "x*$", "X")
+ * }
+ * 
+ * function mygsub(str, regex, replace,	origstr, newstr, eosflag, nonzeroflag)
+ * {
+ * 	origstr = str;
+ * 	eosflag = nonzeroflag = FALSE
+ * 	while (match(str, regex)) {
+ * 		if (RLENGTH > 0) {	# easy case
+ * 			nonzeroflag = TRUE
+ * 			if (RSTART == 1) {	# match at front of string
+ * 				newstr = newstr replace
+ * 			} else {
+ * 				newstr = newstr substr(str, 1, RSTART-1) replace
+ * 			}
+ * 			str = substr(str, RSTART+RLENGTH)
+ * 		} else if (nonzeroflag) {
+ * 			# last match was non-zero in length, and at the
+ * 			# current character, we get a zero length match,
+ * 			# which we don't really want, so skip over it
+ * 			newstr = newstr substr(str, 1, 1)
+ * 			str = substr(str, 2)
+ * 			nonzeroflag = FALSE
+ * 		} else {
+ * 			# 0-length match
+ * 			if (RSTART == 1) {
+ * 				newstr = newstr replace substr(str, 1, 1)
+ * 				str = substr(str, 2)
+ * 			} else {
+ * 				return newstr str replace
+ * 			}
+ * 		}
+ * 		if (length(str) == 0)
+ * 			if (eosflag)
+ * 				break;
+ * 			else
+ * 				eosflag = TRUE
+ * 	}
+ * 	if (length(str) > 0)
+ * 		newstr = newstr str	# rest of string
+ * 
+ * 	return newstr
+ * }
+ */
+
+/*
+ * NB: `howmany' conflicts with a SunOS macro in <sys/param.h>.
+ */
+
+static NODE *
+sub_common(tree, how_many, backdigs)
+NODE *tree;
+int how_many, backdigs;
+{
+	register char *scan;
+	register char *bp, *cp;
+	char *buf;
+	size_t buflen;
+	register char *matchend;
+	register size_t len;
+	char *matchstart;
+	char *text;
+	size_t textlen;
+	char *repl;
+	char *replend;
+	size_t repllen;
+	int sofar;
+	int ampersands;
+	int matches = 0;
+	Regexp *rp;
+	NODE *s;		/* subst. pattern */
+	NODE *t;		/* string to make sub. in; $0 if none given */
+	NODE *tmp;
+	NODE **lhs = &tree;	/* value not used -- just different from NULL */
+	int priv = FALSE;
+	Func_ptr after_assign = NULL;
+
+	int global = (how_many == -1);
+	long current;
+	int lastmatchnonzero;
+
+	tmp = tree->lnode;
+	rp = re_update(tmp);
+
+	tree = tree->rnode;
+	s = tree->lnode;
+
+	tree = tree->rnode;
+	tmp = tree->lnode;
+	t = force_string(tree_eval(tmp));
+
+	/* do the search early to avoid work on non-match */
+	if (research(rp, t->stptr, 0, t->stlen, TRUE) == -1 ||
+	    RESTART(rp, t->stptr) > t->stlen) {
+		free_temp(t);
+		return tmp_number((AWKNUM) 0.0);
+	}
+
+	if (tmp->type == Node_val)
+		lhs = NULL;
+	else
+		lhs = get_lhs(tmp, &after_assign);
+	t->flags |= STRING;
+	/*
+	 * create a private copy of the string
+	 */
+	if (t->stref > 1 || (t->flags & (PERM|FIELD)) != 0) {
+		unsigned int saveflags;
+
+		saveflags = t->flags;
+		t->flags &= ~MALLOC;
+		tmp = dupnode(t);
+		t->flags = saveflags;
+		t = tmp;
+		priv = TRUE;
+	}
+	text = t->stptr;
+	textlen = t->stlen;
+	buflen = textlen + 2;
+
+	s = force_string(tree_eval(s));
+	repl = s->stptr;
+	replend = repl + s->stlen;
+	repllen = replend - repl;
+	emalloc(buf, char *, buflen + 2, "sub_common");
+	buf[buflen] = '\0';
+	buf[buflen + 1] = '\0';
+	ampersands = 0;
+	for (scan = repl; scan < replend; scan++) {
+		if (*scan == '&') {
+			repllen--;
+			ampersands++;
+		} else if (*scan == '\\') {
+			if (backdigs) {	/* gensub, behave sanely */
+				if (ISDIGIT(scan[1])) {
+					ampersands++;
+					scan++;
+				} else {	/* \q for any q --> q */
+					repllen--;
+					scan++;
+				}
+			} else {	/* (proposed) posix '96 mode */
+				if (strncmp(scan, "\\\\\\&", 4) == 0) {
+					/* \\\& --> \& */
+					repllen -= 2;
+					scan += 3;
+				} else if (strncmp(scan, "\\\\&", 3) == 0) {
+					/* \\& --> \<string> */
+					ampersands++;
+					repllen--;
+					scan += 2;
+				} else if (scan[1] == '&') {
+					/* \& --> & */
+					repllen--;
+					scan++;
+				} /* else
+					leave alone, it goes into the output */
+			}
+		}
+	}
+
+	lastmatchnonzero = FALSE;
+	bp = buf;
+	for (current = 1;; current++) {
+		matches++;
+		matchstart = t->stptr + RESTART(rp, t->stptr);
+		matchend = t->stptr + REEND(rp, t->stptr);
+
+		/*
+		 * create the result, copying in parts of the original
+		 * string 
+		 */
+		len = matchstart - text + repllen
+		      + ampersands * (matchend - matchstart);
+		sofar = bp - buf;
+		while (buflen < (sofar + len + 1)) {
+			buflen *= 2;
+			erealloc(buf, char *, buflen, "sub_common");
+			bp = buf + sofar;
+		}
+		for (scan = text; scan < matchstart; scan++)
+			*bp++ = *scan;
+		if (global || current == how_many) {
+			/*
+			 * If the current match matched the null string,
+			 * and the last match didn't and did a replacement,
+			 * then skip this one.
+			 */
+			if (lastmatchnonzero && matchstart == matchend) {
+				lastmatchnonzero = FALSE;
+				goto empty;
+			}
+			/*
+			 * If replacing all occurrences, or this is the
+			 * match we want, copy in the replacement text,
+			 * making substitutions as we go.
+			 */
+			for (scan = repl; scan < replend; scan++)
+				if (*scan == '&')
+					for (cp = matchstart; cp < matchend; cp++)
+						*bp++ = *cp;
+				else if (*scan == '\\') {
+					if (backdigs) {	/* gensub, behave sanely */
+						if (ISDIGIT(scan[1])) {
+							int dig = scan[1] - '0';
+							char *start, *end;
+		
+							start = t->stptr
+							      + SUBPATSTART(rp, t->stptr, dig);
+							end = t->stptr
+							      + SUBPATEND(rp, t->stptr, dig);
+		
+							for (cp = start; cp < end; cp++)
+								*bp++ = *cp;
+							scan++;
+						} else	/* \q for any q --> q */
+							*bp++ = *++scan;
+					} else {	/* posix '96 mode, bleah */
+						if (strncmp(scan, "\\\\\\&", 4) == 0) {
+							/* \\\& --> \& */
+							*bp++ = '\\';
+							*bp++ = '&';
+							scan += 3;
+						} else if (strncmp(scan, "\\\\&", 3) == 0) {
+							/* \\& --> \<string> */
+							*bp++ = '\\';
+							for (cp = matchstart; cp < matchend; cp++)
+								*bp++ = *cp;
+							scan += 2;
+						} else if (scan[1] == '&') {
+							/* \& --> & */
+							*bp++ = '&';
+							scan++;
+						} else
+							*bp++ = *scan;
+					}
+				} else
+					*bp++ = *scan;
+			if (matchstart != matchend)
+				lastmatchnonzero = TRUE;
+		} else {
+			/*
+			 * don't want this match, skip over it by copying
+			 * in current text.
+			 */
+			for (cp = matchstart; cp < matchend; cp++)
+				*bp++ = *cp;
+		}
+	empty:
+		/* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
+		if (matchstart == matchend && matchend < text + textlen) {
+			*bp++ = *matchend;
+			matchend++;
+		}
+		textlen = text + textlen - matchend;
+		text = matchend;
+
+		if ((current >= how_many && !global)
+		    || ((long) textlen <= 0 && matchstart == matchend)
+		    || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1)
+			break;
+
+	}
+	sofar = bp - buf;
+	if (buflen - sofar - textlen - 1) {
+		buflen = sofar + textlen + 2;
+		erealloc(buf, char *, buflen, "sub_common");
+		bp = buf + sofar;
+	}
+	for (scan = matchend; scan < text + textlen; scan++)
+		*bp++ = *scan;
+	*bp = '\0';
+	textlen = bp - buf;
+	free(t->stptr);
+	t->stptr = buf;
+	t->stlen = textlen;
+
+	free_temp(s);
+	if (matches > 0 && lhs) {
+		if (priv) {
+			unref(*lhs);
+			*lhs = t;
+		}
+		if (after_assign != NULL)
+			(*after_assign)();
+		t->flags &= ~(NUM|NUMBER);
+	}
+	return tmp_number((AWKNUM) matches);
+}
+
+/* do_gsub --- global substitution */
+
+NODE *
+do_gsub(tree)
+NODE *tree;
+{
+	return sub_common(tree, -1, FALSE);
+}
+
+/* do_sub --- single substitution */
+
+NODE *
+do_sub(tree)
+NODE *tree;
+{
+	return sub_common(tree, 1, FALSE);
+}
+
+/* do_gensub --- fix up the tree for sub_common for the gensub function */
+
+NODE *
+do_gensub(tree)
+NODE *tree;
+{
+	NODE n1, n2, n3, *t, *tmp, *target, *ret;
+	long how_many = 1;	/* default is one substitution */
+	double d;
+
+	/*
+	 * We have to pull out the value of the global flag, and
+	 * build up a tree without the flag in it, turning it into the
+	 * kind of tree that sub_common() expects.  It helps to draw
+	 * a picture of this ...
+	 */
+	n1 = *tree;
+	n2 = *(tree->rnode);
+	n1.rnode = & n2;
+
+	t = tree_eval(n2.rnode->lnode);	/* value of global flag */
+
+	tmp = force_string(tree_eval(n2.rnode->rnode->lnode));	/* target */
+
+	/*
+	 * We make copy of the original target string, and pass that
+	 * in to sub_common() as the target to make the substitution in.
+	 * We will then return the result string as the return value of
+	 * this function.
+	 */
+	target = make_string(tmp->stptr, tmp->stlen);
+	free_temp(tmp);
+
+	n3 = *(n2.rnode->rnode);
+	n3.lnode = target;
+	n2.rnode = & n3;
+
+	if ((t->flags & (STR|STRING)) != 0) {
+		if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
+			how_many = -1;
+		else
+			how_many = 1;
+	} else {
+		d = force_number(t);
+		if (d > 0)
+			how_many = d;
+		else
+			how_many = 1;
+	}
+
+	free_temp(t);
+
+	ret = sub_common(&n1, how_many, TRUE);
+	free_temp(ret);
+
+	/*
+	 * Note that we don't care what sub_common() returns, since the
+	 * easiest thing for the programmer is to return the string, even
+	 * if no substitutions were done.
+	 */
+	target->flags |= TEMP;
+	return target;
+}
+
+#ifdef GFMT_WORKAROUND
+/*
+ * printf's %g format [can't rely on gcvt()]
+ *	caveat: don't use as argument to *printf()!
+ * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
+ */
+static void
+sgfmt(buf, format, alt, fwidth, prec, g)
+char *buf;	/* return buffer; assumed big enough to hold result */
+const char *format;
+int alt;	/* use alternate form flag */
+int fwidth;	/* field width in a format */
+int prec;	/* indicates desired significant digits, not decimal places */
+double g;	/* value to format */
+{
+	char dform[40];
+	register char *gpos;
+	register char *d, *e, *p;
+	int again = FALSE;
+
+	strncpy(dform, format, sizeof dform - 1);
+	dform[sizeof dform - 1] = '\0';
+	gpos = strrchr(dform, '.');
+
+	if (g == 0.0 && ! alt) {	/* easy special case */
+		*gpos++ = 'd';
+		*gpos = '\0';
+		(void) sprintf(buf, dform, fwidth, 0);
+		return;
+	}
+
+	/* advance to location of 'g' in the format */
+	while (*gpos && *gpos != 'g' && *gpos != 'G')
+		gpos++;
+
+	if (prec <= 0)	      /* negative precision is ignored */
+		prec = (prec < 0 ?  DEFAULT_G_PRECISION : 1);
+
+	if (*gpos == 'G')
+		again = TRUE;
+	/* start with 'e' format (it'll provide nice exponent) */
+	*gpos = 'e';
+	prec--;
+	(void) sprintf(buf, dform, fwidth, prec, g);
+	if ((e = strrchr(buf, 'e')) != NULL) {	/* find exponent  */
+		int expn = atoi(e+1);		/* fetch exponent */
+		if (expn >= -4 && expn <= prec) {	/* per K&R2, B1.2 */
+			/* switch to 'f' format and re-do */
+			*gpos = 'f';
+			prec -= expn;		/* decimal precision */
+			(void) sprintf(buf, dform, fwidth, prec, g);
+			e = buf + strlen(buf);
+			while (*--e == ' ')
+				continue;
+			e++;
+		}
+		else if (again)
+			*gpos = 'E';
+
+		/* if 'alt' in force, then trailing zeros are not removed */
+		if (! alt && (d = strrchr(buf, '.')) != NULL) {
+			/* throw away an excess of precision */
+			for (p = e; p > d && *--p == '0'; )
+				prec--;
+			if (d == p)
+				prec--;
+			if (prec < 0)
+				prec = 0;
+			/* and do that once again */
+			again = TRUE;
+		}
+		if (again)
+			(void) sprintf(buf, dform, fwidth, prec, g);
+	}
+}
+#endif	/* GFMT_WORKAROUND */
+
+#ifdef BITOPS
+#define BITS_PER_BYTE	8	/* if not true, you lose. too bad. */
+
+/* do_lshift --- perform a << operation */
+
+NODE *
+do_lshift(tree)
+NODE *tree;
+{
+	NODE *s1, *s2;
+	unsigned long uval, ushift, result;
+	AWKNUM val, shift;
+
+	s1 = tree_eval(tree->lnode);
+	s2 = tree_eval(tree->rnode->lnode);
+	val = force_number(s1);
+	shift = force_number(s2);
+	free_temp(s1);
+	free_temp(s2);
+
+	if (do_lint) {
+		if (val < 0 || shift < 0)
+			warning("lshift(%lf, %lf): negative values will give strange results", val, shift);
+		if (double_to_int(val) != val || double_to_int(shift) != shift)
+			warning("lshift(%lf, %lf): fractional values will be truncated", val, shift);
+		if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
+			warning("lshift(%lf, %lf): too large shift value will give strange results", val, shift);
+	}
+
+	uval = (unsigned long) val;
+	ushift = (unsigned long) shift;
+
+	result = uval << ushift;
+	return tmp_number((AWKNUM) result);
+}
+
+/* do_rshift --- perform a >> operation */
+
+NODE *
+do_rshift(tree)
+NODE *tree;
+{
+	NODE *s1, *s2;
+	unsigned long uval, ushift, result;
+	AWKNUM val, shift;
+
+	s1 = tree_eval(tree->lnode);
+	s2 = tree_eval(tree->rnode->lnode);
+	val = force_number(s1);
+	shift = force_number(s2);
+	free_temp(s1);
+	free_temp(s2);
+
+	if (do_lint) {
+		if (val < 0 || shift < 0)
+			warning("rshift(%lf, %lf): negative values will give strange results", val, shift);
+		if (double_to_int(val) != val || double_to_int(shift) != shift)
+			warning("rshift(%lf, %lf): fractional values will be truncated", val, shift);
+		if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
+			warning("rshift(%lf, %lf): too large shift value will give strange results", val, shift);
+	}
+
+	uval = (unsigned long) val;
+	ushift = (unsigned long) shift;
+
+	result = uval >> ushift;
+	return tmp_number((AWKNUM) result);
+}
+
+/* do_and --- perform an & operation */
+
+NODE *
+do_and(tree)
+NODE *tree;
+{
+	NODE *s1, *s2;
+	unsigned long uleft, uright, result;
+	AWKNUM left, right;
+
+	s1 = tree_eval(tree->lnode);
+	s2 = tree_eval(tree->rnode->lnode);
+	left = force_number(s1);
+	right = force_number(s2);
+	free_temp(s1);
+	free_temp(s2);
+
+	if (do_lint) {
+		if (left < 0 || right < 0)
+			warning("and(%lf, %lf): negative values will give strange results", left, right);
+		if (double_to_int(left) != left || double_to_int(right) != right)
+			warning("and(%lf, %lf): fractional values will be truncated", left, right);
+	}
+
+	uleft = (unsigned long) left;
+	uright = (unsigned long) right;
+
+	result = uleft & uright;
+	return tmp_number((AWKNUM) result);
+}
+
+/* do_or --- perform an | operation */
+
+NODE *
+do_or(tree)
+NODE *tree;
+{
+	NODE *s1, *s2;
+	unsigned long uleft, uright, result;
+	AWKNUM left, right;
+
+	s1 = tree_eval(tree->lnode);
+	s2 = tree_eval(tree->rnode->lnode);
+	left = force_number(s1);
+	right = force_number(s2);
+	free_temp(s1);
+	free_temp(s2);
+
+	if (do_lint) {
+		if (left < 0 || right < 0)
+			warning("or(%lf, %lf): negative values will give strange results", left, right);
+		if (double_to_int(left) != left || double_to_int(right) != right)
+			warning("or(%lf, %lf): fractional values will be truncated", left, right);
+	}
+
+	uleft = (unsigned long) left;
+	uright = (unsigned long) right;
+
+	result = uleft | uright;
+	return tmp_number((AWKNUM) result);
+}
+
+/* do_xor --- perform an ^ operation */
+
+NODE *
+do_xor(tree)
+NODE *tree;
+{
+	NODE *s1, *s2;
+	unsigned long uleft, uright, result;
+	AWKNUM left, right;
+
+	s1 = tree_eval(tree->lnode);
+	s2 = tree_eval(tree->rnode->lnode);
+	left = force_number(s1);
+	right = force_number(s2);
+	free_temp(s1);
+	free_temp(s2);
+
+	if (do_lint) {
+		if (left < 0 || right < 0)
+			warning("xor(%lf, %lf): negative values will give strange results", left, right);
+		if (double_to_int(left) != left || double_to_int(right) != right)
+			warning("xor(%lf, %lf): fractional values will be truncated", left, right);
+	}
+
+	uleft = (unsigned long) left;
+	uright = (unsigned long) right;
+
+	result = uleft ^ uright;
+	return tmp_number((AWKNUM) result);
+}
+
+/* do_compl --- perform a ~ operation */
+
+NODE *
+do_compl(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d;
+	unsigned long uval;
+
+	tmp = tree_eval(tree->lnode);
+	d = force_number(tmp);
+	free_temp(tmp);
+
+	if (do_lint) {
+		if (uval < 0)
+			warning("compl(%lf): negative value will give strange results", d);
+		if (double_to_int(d) != d)
+			warning("compl(%lf): fractional value will be truncated", d);
+	}
+
+	uval = (unsigned long) d;
+	uval = ~ uval;
+	return tmp_number((AWKNUM) uval);
+}
+
+/* do_strtonum --- the strtonum function */
+
+NODE *
+do_strtonum(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	double d, arg;
+
+	tmp = tree_eval(tree->lnode);
+
+	if ((tmp->flags & (NUM|NUMBER)) != 0)
+		d = (double) force_number(tmp);
+	else if (isnondecimal(tmp->stptr))
+		d = nondec2awknum(tmp->stptr, tmp->stlen);
+	else
+		d = (double) force_number(tmp);
+
+	free_temp(tmp);
+	return tmp_number((AWKNUM) d);
+}
+#endif /* BITOPS */
+
+#if defined(BITOPS) || defined(NONDECDATA)
+/* nondec2awknum --- convert octal or hex value to double */
+
+/*
+ * Because of awk's concatenation rules and the way awk.y:yylex()
+ * collects a number, this routine has to be willing to stop on the
+ * first invalid character.
+ */
+
+AWKNUM
+nondec2awknum(str, len)
+char *str;
+size_t len;
+{
+	AWKNUM retval = 0.0;
+	char save;
+	short val;
+
+	if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
+		assert(len > 2);
+
+		for (str += 2, len -= 2; len > 0; len--, str++) {
+			switch (*str) {
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				val = *str - '0';
+				break;
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
+				val = *str - 'a' + 10;
+				break;
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
+				val = *str - 'A' + 10;
+				break;
+			default:
+				goto done;
+			}
+			retval = (retval * 16) + val;
+		}
+	} else if (*str == '0') {
+		for (; len > 0; len--) {
+			if (! isdigit(*str) || *str == '8' || *str == '9')
+				goto done;
+			retval = (retval * 8) + (*str - '0');
+			str++;
+		}
+	} else {
+		save = str[len];
+		retval = atof(str);
+		str[len] = save;
+	}
+done:
+	return retval;
+}
+#endif /* defined(BITOPS) || defined(NONDECDATA) */
diff --git a/contrib/awk/config.h b/contrib/awk/config.h
new file mode 100644
index 0000000..c745db1
--- /dev/null
+++ b/contrib/awk/config.h
@@ -0,0 +1,207 @@
+/* config.h.  Generated automatically by configure.  */
+/* configh.in.  Generated automatically from configure.in by autoheader.  */
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
+/* 
+ * Copyright (C) 1995-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+
+/* Define if on AIX 3.
+   System headers sometimes define this.
+   We just want to avoid a redefinition error message.  */
+#ifndef _ALL_SOURCE
+/* #undef _ALL_SOURCE */
+#endif
+
+/* Define if using alloca.c.  */
+/* #undef C_ALLOCA */
+
+/* Define if type char is unsigned and you are not using gcc.  */
+#ifndef __CHAR_UNSIGNED__
+/* #undef __CHAR_UNSIGNED__ */
+#endif
+
+/* Define to empty if the keyword does not work.  */
+/* #undef const */
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+   This function is required for alloca.c support on those systems.  */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define to the type of elements in the array set by `getgroups'.
+   Usually this is either `int' or `gid_t'.  */
+#define GETGROUPS_T gid_t
+
+/* Define if the `getpgrp' function takes no argument.  */
+#define GETPGRP_VOID 1
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef gid_t */
+
+/* Define if you have alloca, as a function or macro.  */
+#define HAVE_ALLOCA 1
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix).  */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define if you don't have vprintf but do have _doprnt.  */
+/* #undef HAVE_DOPRNT */
+
+/* Define if you have a working `mmap' system call.  */
+#define HAVE_MMAP 1
+
+/* Define if your struct stat has st_blksize.  */
+#define HAVE_ST_BLKSIZE 1
+
+/* Define if you have <sys/wait.h> that is POSIX.1 compatible.  */
+#define HAVE_SYS_WAIT_H 1
+
+/* Define if your struct tm has tm_zone.  */
+#define HAVE_TM_ZONE 1
+
+/* Define if you don't have tm_zone but do have the external array
+   tzname.  */
+/* #undef HAVE_TZNAME */
+
+/* Define if you have the vprintf function.  */
+#define HAVE_VPRINTF 1
+
+/* Define if on MINIX.  */
+/* #undef _MINIX */
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef pid_t */
+
+/* Define if the system does not provide POSIX.1 features except
+   with this defined.  */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define if you need to in order for stat and other things to work.  */
+/* #undef _POSIX_SOURCE */
+
+/* Define as the return type of signal handlers (int or void).  */
+#define RETSIGTYPE void
+
+/* Define to `unsigned' if <sys/types.h> doesn't define.  */
+/* #undef size_t */
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+ */
+/* #undef STACK_DIRECTION */
+
+/* Define if you have the ANSI C header files.  */
+#define STDC_HEADERS 1
+
+/* Define if you can safely include both <sys/time.h> and <time.h>.  */
+#define TIME_WITH_SYS_TIME 1
+
+/* Define if your <sys/time.h> declares struct tm.  */
+/* #undef TM_IN_SYS_TIME */
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef uid_t */
+
+#define HAVE_STRINGIZE 1 /* can use ANSI # operator in cpp */
+#define REGEX_MALLOC 1 /* use malloc instead of alloca in regex.c */
+#define SPRINTF_RET int /* return type of sprintf */
+/* #undef BITOPS */  /* bitwise ops (undocumented feature) */
+/* #undef NONDECDATA */ /* non-decimal input data (undocumented feature) */
+
+/* Define if you have the fmod function.  */
+#define HAVE_FMOD 1
+
+/* Define if you have the getpagesize function.  */
+#define HAVE_GETPAGESIZE 1
+
+/* Define if you have the madvise function.  */
+#define HAVE_MADVISE 1
+
+/* Define if you have the memcmp function.  */
+#define HAVE_MEMCMP 1
+
+/* Define if you have the memcpy function.  */
+#define HAVE_MEMCPY 1
+
+/* Define if you have the memset function.  */
+#define HAVE_MEMSET 1
+
+/* Define if you have the setlocale function.  */
+#define HAVE_SETLOCALE 1
+
+/* Define if you have the strchr function.  */
+#define HAVE_STRCHR 1
+
+/* Define if you have the strerror function.  */
+#define HAVE_STRERROR 1
+
+/* Define if you have the strftime function.  */
+#define HAVE_STRFTIME 1
+
+/* Define if you have the strncasecmp function.  */
+#define HAVE_STRNCASECMP 1
+
+/* Define if you have the strtod function.  */
+#define HAVE_STRTOD 1
+
+/* Define if you have the system function.  */
+#define HAVE_SYSTEM 1
+
+/* Define if you have the tzset function.  */
+#define HAVE_TZSET 1
+
+/* Define if you have the <limits.h> header file.  */
+#define HAVE_LIMITS_H 1
+
+/* Define if you have the <locale.h> header file.  */
+#define HAVE_LOCALE_H 1
+
+/* Define if you have the <memory.h> header file.  */
+#define HAVE_MEMORY_H 1
+
+/* Define if you have the <signum.h> header file.  */
+/* #undef HAVE_SIGNUM_H */
+
+/* Define if you have the <stdarg.h> header file.  */
+#define HAVE_STDARG_H 1
+
+/* Define if you have the <string.h> header file.  */
+#define HAVE_STRING_H 1
+
+/* Define if you have the <strings.h> header file.  */
+/* #undef HAVE_STRINGS_H */
+
+/* Define if you have the <sys/param.h> header file.  */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define if you have the <unistd.h> header file.  */
+#define HAVE_UNISTD_H 1
+
+/* Define if you have the m library (-lm).  */
+#define HAVE_LIBM 1
+
+#include <custom.h>	/* overrides for stuff autoconf can't deal with */
diff --git a/contrib/awk/custom.h b/contrib/awk/custom.h
new file mode 100644
index 0000000..833bb62
--- /dev/null
+++ b/contrib/awk/custom.h
@@ -0,0 +1,59 @@
+/*
+ * custom.h
+ *
+ * This file is for use on systems where Autoconf isn't quite able to
+ * get things right. It is included after config.h in awk.h, to override
+ * definitions from Autoconf that are erroneous. See the manual for more
+ * information.
+ *
+ * If you make additions to this file for your system, please send me
+ * the information, to arnold@gnu.ai.mit.edu.
+ */
+
+/* 
+ * Copyright (C) 1995-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+/* for MIPS RiscOS, from Nelson H. F. Beebe, beebe@math.utah.edu */
+#if defined(__host_mips) && defined(SYSTYPE_BSD43)
+#undef HAVE_STRTOD
+#undef HAVE_STRERROR
+#endif
+
+/* for VMS POSIX, from Pat Rankin, rankin@eql.caltech.edu */
+#ifdef VMS_POSIX
+#undef VMS
+#include "vms/redirect.h"
+#endif
+
+/* For QNX, based on submission from Michael Hunter, mphunter@qnx.com */
+#ifdef __QNX__
+#define GETPGRP_VOID	1
+#endif
+
+/* For Amigas, from Fred Fish, fnf@ninemoons.com */
+#ifdef __amigaos__
+#define fork vfork
+#endif
+
+/* For sequent, based on email with Aron Griffis <agriffis@calypso.coat.com> */
+#ifdef _SEQUENT_
+#undef HAVE_MMAP
+#endif
diff --git a/contrib/awk/dfa.c b/contrib/awk/dfa.c
new file mode 100644
index 0000000..03dd9e7
--- /dev/null
+++ b/contrib/awk/dfa.c
@@ -0,0 +1,2606 @@
+/* dfa.c - deterministic extended regexp routines for GNU
+   Copyright (C) 1988 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA */
+
+/* Written June, 1988 by Mike Haertel
+   Modified July, 1988 by Arthur David Olson to assist BMG speedups  */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+#include <sys/types.h>
+extern char *calloc(), *malloc(), *realloc();
+extern void free();
+#endif
+
+#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
+#include <string.h>
+#undef index
+#define index strchr
+#else
+#include <strings.h>
+#endif
+
+#ifndef DEBUG	/* use the same approach as regex.c */
+#undef assert
+#define assert(e)
+#endif /* DEBUG */
+
+#ifndef isgraph
+#define isgraph(C) (isprint(C) && !isspace(C))
+#endif
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISALPHA(C) isalpha(C)
+#define ISUPPER(C) isupper(C)
+#define ISLOWER(C) islower(C)
+#define ISDIGIT(C) isdigit(C)
+#define ISXDIGIT(C) isxdigit(C)
+#define ISSPACE(C) isspace(C)
+#define ISPUNCT(C) ispunct(C)
+#define ISALNUM(C) isalnum(C)
+#define ISPRINT(C) isprint(C)
+#define ISGRAPH(C) isgraph(C)
+#define ISCNTRL(C) iscntrl(C)
+#else
+#define ISALPHA(C) (isascii(C) && isalpha(C))
+#define ISUPPER(C) (isascii(C) && isupper(C))
+#define ISLOWER(C) (isascii(C) && islower(C))
+#define ISDIGIT(C) (isascii(C) && isdigit(C))
+#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
+#define ISSPACE(C) (isascii(C) && isspace(C))
+#define ISPUNCT(C) (isascii(C) && ispunct(C))
+#define ISALNUM(C) (isascii(C) && isalnum(C))
+#define ISPRINT(C) (isascii(C) && isprint(C))
+#define ISGRAPH(C) (isascii(C) && isgraph(C))
+#define ISCNTRL(C) (isascii(C) && iscntrl(C))
+#endif
+
+#include "regex.h"
+#include "dfa.h"
+
+#ifdef __STDC__
+typedef void *ptr_t;
+#else
+typedef char *ptr_t;
+#ifndef const
+#define const
+#endif
+#endif
+
+static void dfamust _RE_ARGS((struct dfa *dfa));
+
+static ptr_t xcalloc _RE_ARGS((size_t n, size_t s));
+static ptr_t xmalloc _RE_ARGS((size_t n));
+static ptr_t xrealloc _RE_ARGS((ptr_t p, size_t n));
+#ifdef DEBUG
+static void prtok _RE_ARGS((token t));
+#endif
+static int tstbit _RE_ARGS((int b, charclass c));
+static void setbit _RE_ARGS((int b, charclass c));
+static void clrbit _RE_ARGS((int b, charclass c));
+static void copyset _RE_ARGS((charclass src, charclass dst));
+static void zeroset _RE_ARGS((charclass s));
+static void notset _RE_ARGS((charclass s));
+static int equal _RE_ARGS((charclass s1, charclass s2));
+static int charclass_index _RE_ARGS((charclass s));
+static int looking_at _RE_ARGS((const char *s));
+static token lex _RE_ARGS((void));
+static void addtok _RE_ARGS((token t));
+static void atom _RE_ARGS((void));
+static int nsubtoks _RE_ARGS((int tindex));
+static void copytoks _RE_ARGS((int tindex, int ntokens));
+static void closure _RE_ARGS((void));
+static void branch _RE_ARGS((void));
+static void regexp _RE_ARGS((int toplevel));
+static void copy _RE_ARGS((position_set *src, position_set *dst));
+static void insert _RE_ARGS((position p, position_set *s));
+static void merge _RE_ARGS((position_set *s1, position_set *s2, position_set *m));
+static void delete _RE_ARGS((position p, position_set *s));
+static int state_index _RE_ARGS((struct dfa *d, position_set *s,
+			  int newline, int letter));
+static void build_state _RE_ARGS((int s, struct dfa *d));
+static void build_state_zero _RE_ARGS((struct dfa *d));
+static char *icatalloc _RE_ARGS((char *old, char *new));
+static char *icpyalloc _RE_ARGS((char *string));
+static char *istrstr _RE_ARGS((char *lookin, char *lookfor));
+static void ifree _RE_ARGS((char *cp));
+static void freelist _RE_ARGS((char **cpp));
+static char **enlist _RE_ARGS((char **cpp, char *new, size_t len));
+static char **comsubs _RE_ARGS((char *left, char *right));
+static char **addlists _RE_ARGS((char **old, char **new));
+static char **inboth _RE_ARGS((char **left, char **right));
+
+static ptr_t
+xcalloc(n, s)
+     size_t n;
+     size_t s;
+{
+  ptr_t r = calloc(n, s);
+
+  if (!r)
+    dfaerror("Memory exhausted");
+  return r;
+}
+
+static ptr_t
+xmalloc(n)
+     size_t n;
+{
+  ptr_t r = malloc(n);
+
+  assert(n != 0);
+  if (!r)
+    dfaerror("Memory exhausted");
+  return r;
+}
+
+static ptr_t
+xrealloc(p, n)
+     ptr_t p;
+     size_t n;
+{
+  ptr_t r = realloc(p, n);
+
+  assert(n != 0);
+  if (!r)
+    dfaerror("Memory exhausted");
+  return r;
+}
+
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
+#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
+#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
+
+/* Reallocate an array of type t if nalloc is too small for index. */
+#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
+  if ((index) >= (nalloc))			  \
+    {						  \
+      while ((index) >= (nalloc))		  \
+	(nalloc) *= 2;				  \
+      REALLOC(p, t, nalloc);			  \
+    }
+
+#ifdef DEBUG
+
+static void
+prtok(t)
+     token t;
+{
+  char *s;
+
+  if (t < 0)
+    fprintf(stderr, "END");
+  else if (t < NOTCHAR)
+    fprintf(stderr, "%c", t);
+  else
+    {
+      switch (t)
+	{
+	case EMPTY: s = "EMPTY"; break;
+	case BACKREF: s = "BACKREF"; break;
+	case BEGLINE: s = "BEGLINE"; break;
+	case ENDLINE: s = "ENDLINE"; break;
+	case BEGWORD: s = "BEGWORD"; break;
+	case ENDWORD: s = "ENDWORD"; break;
+	case LIMWORD: s = "LIMWORD"; break;
+	case NOTLIMWORD: s = "NOTLIMWORD"; break;
+	case QMARK: s = "QMARK"; break;
+	case STAR: s = "STAR"; break;
+	case PLUS: s = "PLUS"; break;
+	case CAT: s = "CAT"; break;
+	case OR: s = "OR"; break;
+	case ORTOP: s = "ORTOP"; break;
+	case LPAREN: s = "LPAREN"; break;
+	case RPAREN: s = "RPAREN"; break;
+	default: s = "CSET"; break;
+	}
+      fprintf(stderr, "%s", s);
+    }
+}
+#endif /* DEBUG */
+
+/* Stuff pertaining to charclasses. */
+
+static int
+tstbit(b, c)
+     int b;
+     charclass c;
+{
+  return c[b / INTBITS] & 1 << b % INTBITS;
+}
+
+static void
+setbit(b, c)
+     int b;
+     charclass c;
+{
+  c[b / INTBITS] |= 1 << b % INTBITS;
+}
+
+static void
+clrbit(b, c)
+     int b;
+     charclass c;
+{
+  c[b / INTBITS] &= ~(1 << b % INTBITS);
+}
+
+static void
+copyset(src, dst)
+     charclass src;
+     charclass dst;
+{
+  int i;
+
+  for (i = 0; i < CHARCLASS_INTS; ++i)
+    dst[i] = src[i];
+}
+
+static void
+zeroset(s)
+     charclass s;
+{
+  int i;
+
+  for (i = 0; i < CHARCLASS_INTS; ++i)
+    s[i] = 0;
+}
+
+static void
+notset(s)
+     charclass s;
+{
+  int i;
+
+  for (i = 0; i < CHARCLASS_INTS; ++i)
+    s[i] = ~s[i];
+}
+
+static int
+equal(s1, s2)
+     charclass s1;
+     charclass s2;
+{
+  int i;
+
+  for (i = 0; i < CHARCLASS_INTS; ++i)
+    if (s1[i] != s2[i])
+      return 0;
+  return 1;
+}
+
+/* A pointer to the current dfa is kept here during parsing. */
+static struct dfa *dfa;
+
+/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
+static int
+charclass_index(s)
+     charclass s;
+{
+  int i;
+
+  for (i = 0; i < dfa->cindex; ++i)
+    if (equal(s, dfa->charclasses[i]))
+      return i;
+  REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex);
+  ++dfa->cindex;
+  copyset(s, dfa->charclasses[i]);
+  return i;
+}
+
+/* Syntax bits controlling the behavior of the lexical analyzer. */
+static reg_syntax_t syntax_bits, syntax_bits_set;
+
+/* Flag for case-folding letters into sets. */
+static int case_fold;
+
+/* Entry point to set syntax options. */
+void
+dfasyntax(bits, fold)
+     reg_syntax_t bits;
+     int fold;
+{
+  syntax_bits_set = 1;
+  syntax_bits = bits;
+  case_fold = fold;
+}
+
+/* Lexical analyzer.  All the dross that deals with the obnoxious
+   GNU Regex syntax bits is located here.  The poor, suffering
+   reader is referred to the GNU Regex documentation for the
+   meaning of the @#%!@#%^!@ syntax bits. */
+
+static char *lexstart;		/* Pointer to beginning of input string. */
+static char *lexptr;		/* Pointer to next input character. */
+static int lexleft;		/* Number of characters remaining. */
+static token lasttok;		/* Previous token returned; initially END. */
+static int laststart;		/* True if we're separated from beginning or (, |
+				   only by zero-width characters. */
+static int parens;		/* Count of outstanding left parens. */
+static int minrep, maxrep;	/* Repeat counts for {m,n}. */
+
+/* Note that characters become unsigned here. */
+#define FETCH(c, eoferr)   	      \
+  {			   	      \
+    if (! lexleft)	   	      \
+      if (eoferr != 0)	   	      \
+	dfaerror(eoferr);  	      \
+      else		   	      \
+	return lasttok = END;	      \
+    (c) = (unsigned char) *lexptr++;  \
+    --lexleft;		   	      \
+  }
+
+#ifdef __STDC__
+#define FUNC(F, P) static int F(int c) { return P(c); }
+#else
+#define FUNC(F, P) static int F(c) int c; { return P(c); }
+#endif
+
+FUNC(is_alpha, ISALPHA)
+FUNC(is_upper, ISUPPER)
+FUNC(is_lower, ISLOWER)
+FUNC(is_digit, ISDIGIT)
+FUNC(is_xdigit, ISXDIGIT)
+FUNC(is_space, ISSPACE)
+FUNC(is_punct, ISPUNCT)
+FUNC(is_alnum, ISALNUM)
+FUNC(is_print, ISPRINT)
+FUNC(is_graph, ISGRAPH)
+FUNC(is_cntrl, ISCNTRL)
+
+static int is_blank(c)
+int c;
+{
+   return (c == ' ' || c == '\t');
+}
+
+/* The following list maps the names of the Posix named character classes
+   to predicate functions that determine whether a given character is in
+   the class.  The leading [ has already been eaten by the lexical analyzer. */
+static struct {
+  const char *name;
+  int (*pred) _RE_ARGS((int));
+} prednames[] = {
+  { ":alpha:]", is_alpha },
+  { ":upper:]", is_upper },
+  { ":lower:]", is_lower },
+  { ":digit:]", is_digit },
+  { ":xdigit:]", is_xdigit },
+  { ":space:]", is_space },
+  { ":punct:]", is_punct },
+  { ":alnum:]", is_alnum },
+  { ":print:]", is_print },
+  { ":graph:]", is_graph },
+  { ":cntrl:]", is_cntrl },
+  { ":blank:]", is_blank },
+  { 0 }
+};
+
+static int
+looking_at(s)
+     const char *s;
+{
+  size_t len;
+
+  len = strlen(s);
+  if (lexleft < len)
+    return 0;
+  return strncmp(s, lexptr, len) == 0;
+}
+
+static token
+lex()
+{
+  token c, c1, c2;
+  int backslash = 0, invert;
+  charclass ccl;
+  int i;
+
+  /* Basic plan: We fetch a character.  If it's a backslash,
+     we set the backslash flag and go through the loop again.
+     On the plus side, this avoids having a duplicate of the
+     main switch inside the backslash case.  On the minus side,
+     it means that just about every case begins with
+     "if (backslash) ...".  */
+  for (i = 0; i < 2; ++i)
+    {
+      FETCH(c, 0);
+      switch (c)
+	{
+	case '\\':
+	  if (backslash)
+	    goto normal_char;
+	  if (lexleft == 0)
+	    dfaerror("Unfinished \\ escape");
+	  backslash = 1;
+	  break;
+
+	case '^':
+	  if (backslash)
+	    goto normal_char;
+	  if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+	      || lasttok == END
+	      || lasttok == LPAREN
+	      || lasttok == OR)
+	    return lasttok = BEGLINE;
+	  goto normal_char;
+
+	case '$':
+	  if (backslash)
+	    goto normal_char;
+	  if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+	      || lexleft == 0
+	      || (syntax_bits & RE_NO_BK_PARENS
+		  ? lexleft > 0 && *lexptr == ')'
+		  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
+	      || (syntax_bits & RE_NO_BK_VBAR
+		  ? lexleft > 0 && *lexptr == '|'
+		  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
+	      || ((syntax_bits & RE_NEWLINE_ALT)
+	          && lexleft > 0 && *lexptr == '\n'))
+	    return lasttok = ENDLINE;
+	  goto normal_char;
+
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+	    {
+	      laststart = 0;
+	      return lasttok = BACKREF;
+	    }
+	  goto normal_char;
+
+	case '`':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = BEGLINE;	/* FIXME: should be beginning of string */
+	  goto normal_char;
+
+	case '\'':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = ENDLINE;	/* FIXME: should be end of string */
+	  goto normal_char;
+
+	case '<':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = BEGWORD;
+	  goto normal_char;
+
+	case '>':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = ENDWORD;
+	  goto normal_char;
+
+	case 'b':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = LIMWORD;
+	  goto normal_char;
+
+	case 'B':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = NOTLIMWORD;
+	  goto normal_char;
+
+	case '?':
+	  if (syntax_bits & RE_LIMITED_OPS)
+	    goto normal_char;
+	  if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+	    goto normal_char;
+	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+	    goto normal_char;
+	  return lasttok = QMARK;
+
+	case '*':
+	  if (backslash)
+	    goto normal_char;
+	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+	    goto normal_char;
+	  return lasttok = STAR;
+
+	case '+':
+	  if (syntax_bits & RE_LIMITED_OPS)
+	    goto normal_char;
+	  if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+	    goto normal_char;
+	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+	    goto normal_char;
+	  return lasttok = PLUS;
+
+	case '{':
+	  if (!(syntax_bits & RE_INTERVALS))
+	    goto normal_char;
+	  if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+	    goto normal_char;
+	  minrep = maxrep = 0;
+	  /* Cases:
+	     {M} - exact count
+	     {M,} - minimum count, maximum is infinity
+	     {,M} - 0 through M
+	     {M,N} - M through N */
+	  FETCH(c, "unfinished repeat count");
+	  if (ISDIGIT(c))
+	    {
+	      minrep = c - '0';
+	      for (;;)
+		{
+		  FETCH(c, "unfinished repeat count");
+		  if (!ISDIGIT(c))
+		    break;
+		  minrep = 10 * minrep + c - '0';
+		}
+	    }
+	  else if (c != ',')
+	    dfaerror("malformed repeat count");
+	  if (c == ',')
+	    for (;;)
+	      {
+		FETCH(c, "unfinished repeat count");
+		if (!ISDIGIT(c))
+		  break;
+		maxrep = 10 * maxrep + c - '0';
+	      }
+	  else
+	    maxrep = minrep;
+	  if (!(syntax_bits & RE_NO_BK_BRACES))
+	    {
+	      if (c != '\\')
+		dfaerror("malformed repeat count");
+	      FETCH(c, "unfinished repeat count");
+	    }
+	  if (c != '}')
+	    dfaerror("malformed repeat count");
+	  laststart = 0;
+	  return lasttok = REPMN;
+
+	case '|':
+	  if (syntax_bits & RE_LIMITED_OPS)
+	    goto normal_char;
+	  if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+	    goto normal_char;
+	  laststart = 1;
+	  return lasttok = OR;
+
+	case '\n':
+	  if (syntax_bits & RE_LIMITED_OPS
+	      || backslash
+	      || !(syntax_bits & RE_NEWLINE_ALT))
+	    goto normal_char;
+	  laststart = 1;
+	  return lasttok = OR;
+
+	case '(':
+	  if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+	    goto normal_char;
+	  ++parens;
+	  laststart = 1;
+	  return lasttok = LPAREN;
+
+	case ')':
+	  if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+	    goto normal_char;
+	  if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+	    goto normal_char;
+	  --parens;
+	  laststart = 0;
+	  return lasttok = RPAREN;
+
+	case '.':
+	  if (backslash)
+	    goto normal_char;
+	  zeroset(ccl);
+	  notset(ccl);
+	  if (!(syntax_bits & RE_DOT_NEWLINE))
+	    clrbit('\n', ccl);
+	  if (syntax_bits & RE_DOT_NOT_NULL)
+	    clrbit('\0', ccl);
+	  laststart = 0;
+	  return lasttok = CSET + charclass_index(ccl);
+
+	case 'w':
+	case 'W':
+	  if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+	    goto normal_char;
+	  zeroset(ccl);
+	  for (c2 = 0; c2 < NOTCHAR; ++c2)
+	    if (ISALNUM(c2))
+	      setbit(c2, ccl);
+	  setbit('_', ccl);
+	  if (c == 'W')
+	    notset(ccl);
+	  laststart = 0;
+	  return lasttok = CSET + charclass_index(ccl);
+	
+	case '[':
+	  if (backslash)
+	    goto normal_char;
+	  zeroset(ccl);
+	  FETCH(c, "Unbalanced [");
+	  if (c == '^')
+	    {
+	      FETCH(c, "Unbalanced [");
+	      invert = 1;
+	    }
+	  else
+	    invert = 0;
+	  do
+	    {
+	      /* Nobody ever said this had to be fast. :-)
+		 Note that if we're looking at some other [:...:]
+		 construct, we just treat it as a bunch of ordinary
+		 characters.  We can do this because we assume
+		 regex has checked for syntax errors before
+		 dfa is ever called. */
+	      if (c == '[' && (syntax_bits & RE_CHAR_CLASSES))
+		for (c1 = 0; prednames[c1].name; ++c1)
+		  if (looking_at(prednames[c1].name))
+		    {
+			int (*pred)() = prednames[c1].pred;
+			if (case_fold
+			    && (pred == is_upper || pred == is_lower))
+				pred = is_alpha;
+
+		      for (c2 = 0; c2 < NOTCHAR; ++c2)
+			if ((*pred)(c2))
+			  setbit(c2, ccl);
+		      lexptr += strlen(prednames[c1].name);
+		      lexleft -= strlen(prednames[c1].name);
+		      FETCH(c1, "Unbalanced [");
+		      goto skip;
+		    }
+	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+		FETCH(c, "Unbalanced [");
+	      FETCH(c1, "Unbalanced [");
+	      if (c1 == '-')
+		{
+		  FETCH(c2, "Unbalanced [");
+		  if (c2 == ']')
+		    {
+		      /* In the case [x-], the - is an ordinary hyphen,
+			 which is left in c1, the lookahead character. */
+		      --lexptr;
+		      ++lexleft;
+		      c2 = c;
+		    }
+		  else
+		    {
+		      if (c2 == '\\'
+			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+			FETCH(c2, "Unbalanced [");
+		      FETCH(c1, "Unbalanced [");
+		    }
+		}
+	      else
+		c2 = c;
+	      while (c <= c2)
+		{
+		  setbit(c, ccl);
+		  if (case_fold)
+		    if (ISUPPER(c))
+		      setbit(tolower(c), ccl);
+		    else if (ISLOWER(c))
+		      setbit(toupper(c), ccl);
+		  ++c;
+		}
+	    skip:
+	      ;
+	    }
+	  while ((c = c1) != ']');
+	  if (invert)
+	    {
+	      notset(ccl);
+	      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+		clrbit('\n', ccl);
+	    }
+	  laststart = 0;
+	  return lasttok = CSET + charclass_index(ccl);
+
+	default:
+	normal_char:
+	  laststart = 0;
+	  if (case_fold && ISALPHA(c))
+	    {
+	      zeroset(ccl);
+	      setbit(c, ccl);
+	      if (isupper(c))
+		setbit(tolower(c), ccl);
+	      else
+		setbit(toupper(c), ccl);
+	      return lasttok = CSET + charclass_index(ccl);
+	    }
+	  return c;
+	}
+    }
+
+  /* The above loop should consume at most a backslash
+     and some other character. */
+  abort();
+  return END;	/* keeps pedantic compilers happy. */
+}
+
+/* Recursive descent parser for regular expressions. */
+
+static token tok;		/* Lookahead token. */
+static int depth;		/* Current depth of a hypothetical stack
+				   holding deferred productions.  This is
+				   used to determine the depth that will be
+				   required of the real stack later on in
+				   dfaanalyze(). */
+
+/* Add the given token to the parse tree, maintaining the depth count and
+   updating the maximum depth if necessary. */
+static void
+addtok(t)
+     token t;
+{
+  REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
+  dfa->tokens[dfa->tindex++] = t;
+
+  switch (t)
+    {
+    case QMARK:
+    case STAR:
+    case PLUS:
+      break;
+
+    case CAT:
+    case OR:
+    case ORTOP:
+      --depth;
+      break;
+
+    default:
+      ++dfa->nleaves;
+    case EMPTY:
+      ++depth;
+      break;
+    }
+  if (depth > dfa->depth)
+    dfa->depth = depth;
+}
+
+/* The grammar understood by the parser is as follows.
+
+   regexp:
+     regexp OR branch
+     branch
+
+   branch:
+     branch closure
+     closure
+
+   closure:
+     closure QMARK
+     closure STAR
+     closure PLUS
+     atom
+
+   atom:
+     <normal character>
+     CSET
+     BACKREF
+     BEGLINE
+     ENDLINE
+     BEGWORD
+     ENDWORD
+     LIMWORD
+     NOTLIMWORD
+     <empty>
+
+   The parser builds a parse tree in postfix form in an array of tokens. */
+
+static void
+atom()
+{
+  if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
+      || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
+      || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+    {
+      addtok(tok);
+      tok = lex();
+    }
+  else if (tok == LPAREN)
+    {
+      tok = lex();
+      regexp(0);
+      if (tok != RPAREN)
+	dfaerror("Unbalanced (");
+      tok = lex();
+    }
+  else
+    addtok(EMPTY);
+}
+
+/* Return the number of tokens in the given subexpression. */
+static int
+nsubtoks(tindex)
+int tindex;
+{
+  int ntoks1;
+
+  switch (dfa->tokens[tindex - 1])
+    {
+    default:
+      return 1;
+    case QMARK:
+    case STAR:
+    case PLUS:
+      return 1 + nsubtoks(tindex - 1);
+    case CAT:
+    case OR:
+    case ORTOP:
+      ntoks1 = nsubtoks(tindex - 1);
+      return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
+    }
+}
+
+/* Copy the given subexpression to the top of the tree. */
+static void
+copytoks(tindex, ntokens)
+     int tindex, ntokens;
+{
+  int i;
+
+  for (i = 0; i < ntokens; ++i)
+    addtok(dfa->tokens[tindex + i]);
+}
+
+static void
+closure()
+{
+  int tindex, ntokens, i;
+
+  atom();
+  while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
+    if (tok == REPMN)
+      {
+	ntokens = nsubtoks(dfa->tindex);
+	tindex = dfa->tindex - ntokens;
+	if (maxrep == 0)
+	  addtok(PLUS);
+	if (minrep == 0)
+	  addtok(QMARK);
+	for (i = 1; i < minrep; ++i)
+	  {
+	    copytoks(tindex, ntokens);
+	    addtok(CAT);
+	  }
+	for (; i < maxrep; ++i)
+	  {
+	    copytoks(tindex, ntokens);
+	    addtok(QMARK);
+	    addtok(CAT);
+	  }
+	tok = lex();
+      }
+    else
+      {
+	addtok(tok);
+	tok = lex();
+      }
+}
+
+static void
+branch()
+{
+  closure();
+  while (tok != RPAREN && tok != OR && tok >= 0)
+    {
+      closure();
+      addtok(CAT);
+    }
+}
+
+static void
+regexp(toplevel)
+     int toplevel;
+{
+  branch();
+  while (tok == OR)
+    {
+      tok = lex();
+      branch();
+      if (toplevel)
+	addtok(ORTOP);
+      else
+	addtok(OR);
+    }
+}
+
+/* Main entry point for the parser.  S is a string to be parsed, len is the
+   length of the string, so s can include NUL characters.  D is a pointer to
+   the struct dfa to parse into. */
+void
+dfaparse(s, len, d)
+     char *s;
+     size_t len;
+     struct dfa *d;
+
+{
+  dfa = d;
+  lexstart = lexptr = s;
+  lexleft = len;
+  lasttok = END;
+  laststart = 1;
+  parens = 0;
+
+  if (! syntax_bits_set)
+    dfaerror("No syntax specified");
+
+  tok = lex();
+  depth = d->depth;
+
+  regexp(1);
+
+  if (tok != END)
+    dfaerror("Unbalanced )");
+
+  addtok(END - d->nregexps);
+  addtok(CAT);
+
+  if (d->nregexps)
+    addtok(ORTOP);
+
+  ++d->nregexps;
+}
+
+/* Some primitives for operating on sets of positions. */
+
+/* Copy one set to another; the destination must be large enough. */
+static void
+copy(src, dst)
+     position_set *src;
+     position_set *dst;
+{
+  int i;
+
+  for (i = 0; i < src->nelem; ++i)
+    dst->elems[i] = src->elems[i];
+  dst->nelem = src->nelem;
+}
+
+/* Insert a position in a set.  Position sets are maintained in sorted
+   order according to index.  If position already exists in the set with
+   the same index then their constraints are logically or'd together.
+   S->elems must point to an array large enough to hold the resulting set. */
+static void
+insert(p, s)
+     position p;
+     position_set *s;
+{
+  int i;
+  position t1, t2;
+
+  for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
+    continue;
+  if (i < s->nelem && p.index == s->elems[i].index)
+    s->elems[i].constraint |= p.constraint;
+  else
+    {
+      t1 = p;
+      ++s->nelem;
+      while (i < s->nelem)
+	{
+	  t2 = s->elems[i];
+	  s->elems[i++] = t1;
+	  t1 = t2;
+	}
+    }
+}
+
+/* Merge two sets of positions into a third.  The result is exactly as if
+   the positions of both sets were inserted into an initially empty set. */
+static void
+merge(s1, s2, m)
+     position_set *s1;
+     position_set *s2;
+     position_set *m;
+{
+  int i = 0, j = 0;
+
+  m->nelem = 0;
+  while (i < s1->nelem && j < s2->nelem)
+    if (s1->elems[i].index > s2->elems[j].index)
+      m->elems[m->nelem++] = s1->elems[i++];
+    else if (s1->elems[i].index < s2->elems[j].index)
+      m->elems[m->nelem++] = s2->elems[j++];
+    else
+      {
+	m->elems[m->nelem] = s1->elems[i++];
+	m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
+      }
+  while (i < s1->nelem)
+    m->elems[m->nelem++] = s1->elems[i++];
+  while (j < s2->nelem)
+    m->elems[m->nelem++] = s2->elems[j++];
+}
+
+/* Delete a position from a set. */
+static void
+delete(p, s)
+     position p;
+     position_set *s;
+{
+  int i;
+
+  for (i = 0; i < s->nelem; ++i)
+    if (p.index == s->elems[i].index)
+      break;
+  if (i < s->nelem)
+    for (--s->nelem; i < s->nelem; ++i)
+      s->elems[i] = s->elems[i + 1];
+}
+
+/* Find the index of the state corresponding to the given position set with
+   the given preceding context, or create a new state if there is no such
+   state.  Newline and letter tell whether we got here on a newline or
+   letter, respectively. */
+static int
+state_index(d, s, newline, letter)
+     struct dfa *d;
+     position_set *s;
+     int newline;
+     int letter;
+{
+  int hash = 0;
+  int constraint;
+  int i, j;
+
+  newline = newline ? 1 : 0;
+  letter = letter ? 1 : 0;
+
+  for (i = 0; i < s->nelem; ++i)
+    hash ^= s->elems[i].index + s->elems[i].constraint;
+
+  /* Try to find a state that exactly matches the proposed one. */
+  for (i = 0; i < d->sindex; ++i)
+    {
+      if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
+	  || newline != d->states[i].newline || letter != d->states[i].letter)
+	continue;
+      for (j = 0; j < s->nelem; ++j)
+	if (s->elems[j].constraint
+	    != d->states[i].elems.elems[j].constraint
+	    || s->elems[j].index != d->states[i].elems.elems[j].index)
+	  break;
+      if (j == s->nelem)
+	return i;
+    }
+
+  /* We'll have to create a new state. */
+  REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex);
+  d->states[i].hash = hash;
+  MALLOC(d->states[i].elems.elems, position, s->nelem);
+  copy(s, &d->states[i].elems);
+  d->states[i].newline = newline;
+  d->states[i].letter = letter;
+  d->states[i].backref = 0;
+  d->states[i].constraint = 0;
+  d->states[i].first_end = 0;
+  for (j = 0; j < s->nelem; ++j)
+    if (d->tokens[s->elems[j].index] < 0)
+      {
+	constraint = s->elems[j].constraint;
+	if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
+	    || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
+	    || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
+	    || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
+	  d->states[i].constraint |= constraint;
+	if (! d->states[i].first_end)
+	  d->states[i].first_end = d->tokens[s->elems[j].index];
+      }
+    else if (d->tokens[s->elems[j].index] == BACKREF)
+      {
+	d->states[i].constraint = NO_CONSTRAINT;
+	d->states[i].backref = 1;
+      }
+
+  ++d->sindex;
+
+  return i;
+}
+
+/* Find the epsilon closure of a set of positions.  If any position of the set
+   contains a symbol that matches the empty string in some context, replace
+   that position with the elements of its follow labeled with an appropriate
+   constraint.  Repeat exhaustively until no funny positions are left.
+   S->elems must be large enough to hold the result. */
+static void epsclosure _RE_ARGS((position_set *s, struct dfa *d));
+
+static void
+epsclosure(s, d)
+     position_set *s;
+     struct dfa *d;
+{
+  int i, j;
+  int *visited;
+  position p, old;
+
+  MALLOC(visited, int, d->tindex);
+  for (i = 0; i < d->tindex; ++i)
+    visited[i] = 0;
+
+  for (i = 0; i < s->nelem; ++i)
+    if (d->tokens[s->elems[i].index] >= NOTCHAR
+	&& d->tokens[s->elems[i].index] != BACKREF
+	&& d->tokens[s->elems[i].index] < CSET)
+      {
+	old = s->elems[i];
+	p.constraint = old.constraint;
+	delete(s->elems[i], s);
+	if (visited[old.index])
+	  {
+	    --i;
+	    continue;
+	  }
+	visited[old.index] = 1;
+	switch (d->tokens[old.index])
+	  {
+	  case BEGLINE:
+	    p.constraint &= BEGLINE_CONSTRAINT;
+	    break;
+	  case ENDLINE:
+	    p.constraint &= ENDLINE_CONSTRAINT;
+	    break;
+	  case BEGWORD:
+	    p.constraint &= BEGWORD_CONSTRAINT;
+	    break;
+	  case ENDWORD:
+	    p.constraint &= ENDWORD_CONSTRAINT;
+	    break;
+	  case LIMWORD:
+	    p.constraint &= LIMWORD_CONSTRAINT;
+	    break;
+	  case NOTLIMWORD:
+	    p.constraint &= NOTLIMWORD_CONSTRAINT;
+	    break;
+	  default:
+	    break;
+	  }
+	for (j = 0; j < d->follows[old.index].nelem; ++j)
+	  {
+	    p.index = d->follows[old.index].elems[j].index;
+	    insert(p, s);
+	  }
+	/* Force rescan to start at the beginning. */
+	i = -1;
+      }
+
+  free(visited);
+}
+
+/* Perform bottom-up analysis on the parse tree, computing various functions.
+   Note that at this point, we're pretending constructs like \< are real
+   characters rather than constraints on what can follow them.
+
+   Nullable:  A node is nullable if it is at the root of a regexp that can
+   match the empty string.
+   *  EMPTY leaves are nullable.
+   * No other leaf is nullable.
+   * A QMARK or STAR node is nullable.
+   * A PLUS node is nullable if its argument is nullable.
+   * A CAT node is nullable if both its arguments are nullable.
+   * An OR node is nullable if either argument is nullable.
+
+   Firstpos:  The firstpos of a node is the set of positions (nonempty leaves)
+   that could correspond to the first character of a string matching the
+   regexp rooted at the given node.
+   * EMPTY leaves have empty firstpos.
+   * The firstpos of a nonempty leaf is that leaf itself.
+   * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its
+     argument.
+   * The firstpos of a CAT node is the firstpos of the left argument, union
+     the firstpos of the right if the left argument is nullable.
+   * The firstpos of an OR node is the union of firstpos of each argument.
+
+   Lastpos:  The lastpos of a node is the set of positions that could
+   correspond to the last character of a string matching the regexp at
+   the given node.
+   * EMPTY leaves have empty lastpos.
+   * The lastpos of a nonempty leaf is that leaf itself.
+   * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its
+     argument.
+   * The lastpos of a CAT node is the lastpos of its right argument, union
+     the lastpos of the left if the right argument is nullable.
+   * The lastpos of an OR node is the union of the lastpos of each argument.
+
+   Follow:  The follow of a position is the set of positions that could
+   correspond to the character following a character matching the node in
+   a string matching the regexp.  At this point we consider special symbols
+   that match the empty string in some context to be just normal characters.
+   Later, if we find that a special symbol is in a follow set, we will
+   replace it with the elements of its follow, labeled with an appropriate
+   constraint.
+   * Every node in the firstpos of the argument of a STAR or PLUS node is in
+     the follow of every node in the lastpos.
+   * Every node in the firstpos of the second argument of a CAT node is in
+     the follow of every node in the lastpos of the first argument.
+
+   Because of the postfix representation of the parse tree, the depth-first
+   analysis is conveniently done by a linear scan with the aid of a stack.
+   Sets are stored as arrays of the elements, obeying a stack-like allocation
+   scheme; the number of elements in each set deeper in the stack can be
+   used to determine the address of a particular set's array. */
+void
+dfaanalyze(d, searchflag)
+     struct dfa *d;
+     int searchflag;
+{
+  int *nullable;		/* Nullable stack. */
+  int *nfirstpos;		/* Element count stack for firstpos sets. */
+  position *firstpos;		/* Array where firstpos elements are stored. */
+  int *nlastpos;		/* Element count stack for lastpos sets. */
+  position *lastpos;		/* Array where lastpos elements are stored. */
+  int *nalloc;			/* Sizes of arrays allocated to follow sets. */
+  position_set tmp;		/* Temporary set for merging sets. */
+  position_set merged;		/* Result of merging sets. */
+  int wants_newline;		/* True if some position wants newline info. */
+  int *o_nullable;
+  int *o_nfirst, *o_nlast;
+  position *o_firstpos, *o_lastpos;
+  int i, j;
+  position *pos;
+
+#ifdef DEBUG
+  fprintf(stderr, "dfaanalyze:\n");
+  for (i = 0; i < d->tindex; ++i)
+    {
+      fprintf(stderr, " %d:", i);
+      prtok(d->tokens[i]);
+    }
+  putc('\n', stderr);
+#endif
+
+  d->searchflag = searchflag;
+
+  MALLOC(nullable, int, d->depth);
+  o_nullable = nullable;
+  MALLOC(nfirstpos, int, d->depth);
+  o_nfirst = nfirstpos;
+  MALLOC(firstpos, position, d->nleaves);
+  o_firstpos = firstpos, firstpos += d->nleaves;
+  MALLOC(nlastpos, int, d->depth);
+  o_nlast = nlastpos;
+  MALLOC(lastpos, position, d->nleaves);
+  o_lastpos = lastpos, lastpos += d->nleaves;
+  MALLOC(nalloc, int, d->tindex);
+  for (i = 0; i < d->tindex; ++i)
+    nalloc[i] = 0;
+  MALLOC(merged.elems, position, d->nleaves);
+
+  CALLOC(d->follows, position_set, d->tindex);
+
+  for (i = 0; i < d->tindex; ++i)
+#ifdef DEBUG
+    {				/* Nonsyntactic #ifdef goo... */
+#endif
+    switch (d->tokens[i])
+      {
+      case EMPTY:
+	/* The empty set is nullable. */
+	*nullable++ = 1;
+
+	/* The firstpos and lastpos of the empty leaf are both empty. */
+	*nfirstpos++ = *nlastpos++ = 0;
+	break;
+
+      case STAR:
+      case PLUS:
+	/* Every element in the firstpos of the argument is in the follow
+	   of every element in the lastpos. */
+	tmp.nelem = nfirstpos[-1];
+	tmp.elems = firstpos;
+	pos = lastpos;
+	for (j = 0; j < nlastpos[-1]; ++j)
+	  {
+	    merge(&tmp, &d->follows[pos[j].index], &merged);
+	    REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
+				 nalloc[pos[j].index], merged.nelem - 1);
+	    copy(&merged, &d->follows[pos[j].index]);
+	  }
+
+      case QMARK:
+	/* A QMARK or STAR node is automatically nullable. */
+	if (d->tokens[i] != PLUS)
+	  nullable[-1] = 1;
+	break;
+
+      case CAT:
+	/* Every element in the firstpos of the second argument is in the
+	   follow of every element in the lastpos of the first argument. */
+	tmp.nelem = nfirstpos[-1];
+	tmp.elems = firstpos;
+	pos = lastpos + nlastpos[-1];
+	for (j = 0; j < nlastpos[-2]; ++j)
+	  {
+	    merge(&tmp, &d->follows[pos[j].index], &merged);
+	    REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position,
+				 nalloc[pos[j].index], merged.nelem - 1);
+	    copy(&merged, &d->follows[pos[j].index]);
+	  }
+
+	/* The firstpos of a CAT node is the firstpos of the first argument,
+	   union that of the second argument if the first is nullable. */
+	if (nullable[-2])
+	  nfirstpos[-2] += nfirstpos[-1];
+	else
+	  firstpos += nfirstpos[-1];
+	--nfirstpos;
+
+	/* The lastpos of a CAT node is the lastpos of the second argument,
+	   union that of the first argument if the second is nullable. */
+	if (nullable[-1])
+	  nlastpos[-2] += nlastpos[-1];
+	else
+	  {
+	    pos = lastpos + nlastpos[-2];
+	    for (j = nlastpos[-1] - 1; j >= 0; --j)
+	      pos[j] = lastpos[j];
+	    lastpos += nlastpos[-2];
+	    nlastpos[-2] = nlastpos[-1];
+	  }
+	--nlastpos;
+
+	/* A CAT node is nullable if both arguments are nullable. */
+	nullable[-2] = nullable[-1] && nullable[-2];
+	--nullable;
+	break;
+
+      case OR:
+      case ORTOP:
+	/* The firstpos is the union of the firstpos of each argument. */
+	nfirstpos[-2] += nfirstpos[-1];
+	--nfirstpos;
+
+	/* The lastpos is the union of the lastpos of each argument. */
+	nlastpos[-2] += nlastpos[-1];
+	--nlastpos;
+
+	/* An OR node is nullable if either argument is nullable. */
+	nullable[-2] = nullable[-1] || nullable[-2];
+	--nullable;
+	break;
+
+      default:
+	/* Anything else is a nonempty position.  (Note that special
+	   constructs like \< are treated as nonempty strings here;
+	   an "epsilon closure" effectively makes them nullable later.
+	   Backreferences have to get a real position so we can detect
+	   transitions on them later.  But they are nullable. */
+	*nullable++ = d->tokens[i] == BACKREF;
+
+	/* This position is in its own firstpos and lastpos. */
+	*nfirstpos++ = *nlastpos++ = 1;
+	--firstpos, --lastpos;
+	firstpos->index = lastpos->index = i;
+	firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
+
+	/* Allocate the follow set for this position. */
+	nalloc[i] = 1;
+	MALLOC(d->follows[i].elems, position, nalloc[i]);
+	break;
+      }
+#ifdef DEBUG
+    /* ... balance the above nonsyntactic #ifdef goo... */
+      fprintf(stderr, "node %d:", i);
+      prtok(d->tokens[i]);
+      putc('\n', stderr);
+      fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+      fprintf(stderr, " firstpos:");
+      for (j = nfirstpos[-1] - 1; j >= 0; --j)
+	{
+	  fprintf(stderr, " %d:", firstpos[j].index);
+	  prtok(d->tokens[firstpos[j].index]);
+	}
+      fprintf(stderr, "\n lastpos:");
+      for (j = nlastpos[-1] - 1; j >= 0; --j)
+	{
+	  fprintf(stderr, " %d:", lastpos[j].index);
+	  prtok(d->tokens[lastpos[j].index]);
+	}
+      putc('\n', stderr);
+    }
+#endif
+
+  /* For each follow set that is the follow set of a real position, replace
+     it with its epsilon closure. */
+  for (i = 0; i < d->tindex; ++i)
+    if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
+	|| d->tokens[i] >= CSET)
+      {
+#ifdef DEBUG
+	fprintf(stderr, "follows(%d:", i);
+	prtok(d->tokens[i]);
+	fprintf(stderr, "):");
+	for (j = d->follows[i].nelem - 1; j >= 0; --j)
+	  {
+	    fprintf(stderr, " %d:", d->follows[i].elems[j].index);
+	    prtok(d->tokens[d->follows[i].elems[j].index]);
+	  }
+	putc('\n', stderr);
+#endif
+	copy(&d->follows[i], &merged);
+	epsclosure(&merged, d);
+	if (d->follows[i].nelem < merged.nelem)
+	  REALLOC(d->follows[i].elems, position, merged.nelem);
+	copy(&merged, &d->follows[i]);
+      }
+
+  /* Get the epsilon closure of the firstpos of the regexp.  The result will
+     be the set of positions of state 0. */
+  merged.nelem = 0;
+  for (i = 0; i < nfirstpos[-1]; ++i)
+    insert(firstpos[i], &merged);
+  epsclosure(&merged, d);
+
+  /* Check if any of the positions of state 0 will want newline context. */
+  wants_newline = 0;
+  for (i = 0; i < merged.nelem; ++i)
+    if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
+      wants_newline = 1;
+
+  /* Build the initial state. */
+  d->salloc = 1;
+  d->sindex = 0;
+  MALLOC(d->states, dfa_state, d->salloc);
+  state_index(d, &merged, wants_newline, 0);
+
+  free(o_nullable);
+  free(o_nfirst);
+  free(o_firstpos);
+  free(o_nlast);
+  free(o_lastpos);
+  free(nalloc);
+  free(merged.elems);
+}
+
+/* Find, for each character, the transition out of state s of d, and store
+   it in the appropriate slot of trans.
+
+   We divide the positions of s into groups (positions can appear in more
+   than one group).  Each group is labeled with a set of characters that
+   every position in the group matches (taking into account, if necessary,
+   preceding context information of s).  For each group, find the union
+   of the its elements' follows.  This set is the set of positions of the
+   new state.  For each character in the group's label, set the transition
+   on this character to be to a state corresponding to the set's positions,
+   and its associated backward context information, if necessary.
+
+   If we are building a searching matcher, we include the positions of state
+   0 in every state.
+
+   The collection of groups is constructed by building an equivalence-class
+   partition of the positions of s.
+
+   For each position, find the set of characters C that it matches.  Eliminate
+   any characters from C that fail on grounds of backward context.
+
+   Search through the groups, looking for a group whose label L has nonempty
+   intersection with C.  If L - C is nonempty, create a new group labeled
+   L - C and having the same positions as the current group, and set L to
+   the intersection of L and C.  Insert the position in this group, set
+   C = C - L, and resume scanning.
+
+   If after comparing with every group there are characters remaining in C,
+   create a new group labeled with the characters of C and insert this
+   position in that group. */
+void
+dfastate(s, d, trans)
+     int s;
+     struct dfa *d;
+     int trans[];
+{
+  position_set grps[NOTCHAR];	/* As many as will ever be needed. */
+  charclass labels[NOTCHAR];	/* Labels corresponding to the groups. */
+  int ngrps = 0;		/* Number of groups actually used. */
+  position pos;			/* Current position being considered. */
+  charclass matches;		/* Set of matching characters. */
+  int matchesf;			/* True if matches is nonempty. */
+  charclass intersect;		/* Intersection with some label set. */
+  int intersectf;		/* True if intersect is nonempty. */
+  charclass leftovers;		/* Stuff in the label that didn't match. */
+  int leftoversf;		/* True if leftovers is nonempty. */
+  static charclass letters;	/* Set of characters considered letters. */
+  static charclass newline;	/* Set of characters that aren't newline. */
+  position_set follows;		/* Union of the follows of some group. */
+  position_set tmp;		/* Temporary space for merging sets. */
+  int state;			/* New state. */
+  int wants_newline;		/* New state wants to know newline context. */
+  int state_newline;		/* New state on a newline transition. */
+  int wants_letter;		/* New state wants to know letter context. */
+  int state_letter;		/* New state on a letter transition. */
+  static int initialized;	/* Flag for static initialization. */
+  int i, j, k;
+
+  /* Initialize the set of letters, if necessary. */
+  if (! initialized)
+    {
+      initialized = 1;
+      for (i = 0; i < NOTCHAR; ++i)
+	if (ISALNUM(i))
+	  setbit(i, letters);
+      setbit('\n', newline);
+    }
+
+  zeroset(matches);
+
+  for (i = 0; i < d->states[s].elems.nelem; ++i)
+    {
+      pos = d->states[s].elems.elems[i];
+      if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
+	setbit(d->tokens[pos.index], matches);
+      else if (d->tokens[pos.index] >= CSET)
+	copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
+      else
+	continue;
+
+      /* Some characters may need to be eliminated from matches because
+	 they fail in the current context. */
+      if (pos.constraint != 0xFF)
+	{
+	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+					 d->states[s].newline, 1))
+	    clrbit('\n', matches);
+	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
+					 d->states[s].newline, 0))
+	    for (j = 0; j < CHARCLASS_INTS; ++j)
+	      matches[j] &= newline[j];
+	  if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+					d->states[s].letter, 1))
+	    for (j = 0; j < CHARCLASS_INTS; ++j)
+	      matches[j] &= ~letters[j];
+	  if (! MATCHES_LETTER_CONTEXT(pos.constraint,
+					d->states[s].letter, 0))
+	    for (j = 0; j < CHARCLASS_INTS; ++j)
+	      matches[j] &= letters[j];
+
+	  /* If there are no characters left, there's no point in going on. */
+	  for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
+	    continue;
+	  if (j == CHARCLASS_INTS)
+	    continue;
+	}
+
+      for (j = 0; j < ngrps; ++j)
+	{
+	  /* If matches contains a single character only, and the current
+	     group's label doesn't contain that character, go on to the
+	     next group. */
+	  if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
+	      && !tstbit(d->tokens[pos.index], labels[j]))
+	    continue;
+
+	  /* Check if this group's label has a nonempty intersection with
+	     matches. */
+	  intersectf = 0;
+	  for (k = 0; k < CHARCLASS_INTS; ++k)
+	    (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
+	  if (! intersectf)
+	    continue;
+
+	  /* It does; now find the set differences both ways. */
+	  leftoversf = matchesf = 0;
+	  for (k = 0; k < CHARCLASS_INTS; ++k)
+	    {
+	      /* Even an optimizing compiler can't know this for sure. */
+	      int match = matches[k], label = labels[j][k];
+
+	      (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
+	      (matches[k] = match & ~label) ? (matchesf = 1) : 0;
+	    }
+
+	  /* If there were leftovers, create a new group labeled with them. */
+	  if (leftoversf)
+	    {
+	      copyset(leftovers, labels[ngrps]);
+	      copyset(intersect, labels[j]);
+	      MALLOC(grps[ngrps].elems, position, d->nleaves);
+	      copy(&grps[j], &grps[ngrps]);
+	      ++ngrps;
+	    }
+
+	  /* Put the position in the current group.  Note that there is no
+	     reason to call insert() here. */
+	  grps[j].elems[grps[j].nelem++] = pos;
+
+	  /* If every character matching the current position has been
+	     accounted for, we're done. */
+	  if (! matchesf)
+	    break;
+	}
+
+      /* If we've passed the last group, and there are still characters
+	 unaccounted for, then we'll have to create a new group. */
+      if (j == ngrps)
+	{
+	  copyset(matches, labels[ngrps]);
+	  zeroset(matches);
+	  MALLOC(grps[ngrps].elems, position, d->nleaves);
+	  grps[ngrps].nelem = 1;
+	  grps[ngrps].elems[0] = pos;
+	  ++ngrps;
+	}
+    }
+
+  MALLOC(follows.elems, position, d->nleaves);
+  MALLOC(tmp.elems, position, d->nleaves);
+
+  /* If we are a searching matcher, the default transition is to a state
+     containing the positions of state 0, otherwise the default transition
+     is to fail miserably. */
+  if (d->searchflag)
+    {
+      wants_newline = 0;
+      wants_letter = 0;
+      for (i = 0; i < d->states[0].elems.nelem; ++i)
+	{
+	  if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
+	    wants_newline = 1;
+	  if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
+	    wants_letter = 1;
+	}
+      copy(&d->states[0].elems, &follows);
+      state = state_index(d, &follows, 0, 0);
+      if (wants_newline)
+	state_newline = state_index(d, &follows, 1, 0);
+      else
+	state_newline = state;
+      if (wants_letter)
+	state_letter = state_index(d, &follows, 0, 1);
+      else
+	state_letter = state;
+      for (i = 0; i < NOTCHAR; ++i)
+	if (i == '\n')
+	  trans[i] = state_newline;
+	else if (ISALNUM(i))
+	  trans[i] = state_letter;
+	else
+	  trans[i] = state;
+    }
+  else
+    for (i = 0; i < NOTCHAR; ++i)
+      trans[i] = -1;
+
+  for (i = 0; i < ngrps; ++i)
+    {
+      follows.nelem = 0;
+
+      /* Find the union of the follows of the positions of the group.
+	 This is a hideously inefficient loop.  Fix it someday. */
+      for (j = 0; j < grps[i].nelem; ++j)
+	for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k)
+	  insert(d->follows[grps[i].elems[j].index].elems[k], &follows);
+
+      /* If we are building a searching matcher, throw in the positions
+	 of state 0 as well. */
+      if (d->searchflag)
+	for (j = 0; j < d->states[0].elems.nelem; ++j)
+	  insert(d->states[0].elems.elems[j], &follows);
+
+      /* Find out if the new state will want any context information. */
+      wants_newline = 0;
+      if (tstbit('\n', labels[i]))
+	for (j = 0; j < follows.nelem; ++j)
+	  if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
+	    wants_newline = 1;
+
+      wants_letter = 0;
+      for (j = 0; j < CHARCLASS_INTS; ++j)
+	if (labels[i][j] & letters[j])
+	  break;
+      if (j < CHARCLASS_INTS)
+	for (j = 0; j < follows.nelem; ++j)
+	  if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
+	    wants_letter = 1;
+
+      /* Find the state(s) corresponding to the union of the follows. */
+      state = state_index(d, &follows, 0, 0);
+      if (wants_newline)
+	state_newline = state_index(d, &follows, 1, 0);
+      else
+	state_newline = state;
+      if (wants_letter)
+	state_letter = state_index(d, &follows, 0, 1);
+      else
+	state_letter = state;
+
+      /* Set the transitions for each character in the current label. */
+      for (j = 0; j < CHARCLASS_INTS; ++j)
+	for (k = 0; k < INTBITS; ++k)
+	  if (labels[i][j] & 1 << k)
+	    {
+	      int c = j * INTBITS + k;
+
+	      if (c == '\n')
+		trans[c] = state_newline;
+	      else if (ISALNUM(c))
+		trans[c] = state_letter;
+	      else if (c < NOTCHAR)
+		trans[c] = state;
+	    }
+    }
+
+  for (i = 0; i < ngrps; ++i)
+    free(grps[i].elems);
+  free(follows.elems);
+  free(tmp.elems);
+}
+
+/* Some routines for manipulating a compiled dfa's transition tables.
+   Each state may or may not have a transition table; if it does, and it
+   is a non-accepting state, then d->trans[state] points to its table.
+   If it is an accepting state then d->fails[state] points to its table.
+   If it has no table at all, then d->trans[state] is NULL.
+   TODO: Improve this comment, get rid of the unnecessary redundancy. */
+
+static void
+build_state(s, d)
+     int s;
+     struct dfa *d;
+{
+  int *trans;			/* The new transition table. */
+  int i;
+
+  /* Set an upper limit on the number of transition tables that will ever
+     exist at once.  1024 is arbitrary.  The idea is that the frequently
+     used transition tables will be quickly rebuilt, whereas the ones that
+     were only needed once or twice will be cleared away. */
+  if (d->trcount >= 1024)
+    {
+      for (i = 0; i < d->tralloc; ++i)
+	if (d->trans[i])
+	  {
+	    free((ptr_t) d->trans[i]);
+	    d->trans[i] = NULL;
+	  }
+	else if (d->fails[i])
+	  {
+	    free((ptr_t) d->fails[i]);
+	    d->fails[i] = NULL;
+	  }
+      d->trcount = 0;
+    }
+
+  ++d->trcount;
+
+  /* Set up the success bits for this state. */
+  d->success[s] = 0;
+  if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
+      s, *d))
+    d->success[s] |= 4;
+  if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
+      s, *d))
+    d->success[s] |= 2;
+  if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
+      s, *d))
+    d->success[s] |= 1;
+
+  MALLOC(trans, int, NOTCHAR);
+  dfastate(s, d, trans);
+
+  /* Now go through the new transition table, and make sure that the trans
+     and fail arrays are allocated large enough to hold a pointer for the
+     largest state mentioned in the table. */
+  for (i = 0; i < NOTCHAR; ++i)
+    if (trans[i] >= d->tralloc)
+      {
+	int oldalloc = d->tralloc;
+
+	while (trans[i] >= d->tralloc)
+	  d->tralloc *= 2;
+	REALLOC(d->realtrans, int *, d->tralloc + 1);
+	d->trans = d->realtrans + 1;
+	REALLOC(d->fails, int *, d->tralloc);
+	REALLOC(d->success, int, d->tralloc);
+	REALLOC(d->newlines, int, d->tralloc);
+	while (oldalloc < d->tralloc)
+	  {
+	    d->trans[oldalloc] = NULL;
+	    d->fails[oldalloc++] = NULL;
+	  }
+      }
+
+  /* Keep the newline transition in a special place so we can use it as
+     a sentinel. */
+  d->newlines[s] = trans['\n'];
+  trans['\n'] = -1;
+
+  if (ACCEPTING(s, *d))
+    d->fails[s] = trans;
+  else
+    d->trans[s] = trans;
+}
+
+static void
+build_state_zero(d)
+     struct dfa *d;
+{
+  d->tralloc = 1;
+  d->trcount = 0;
+  CALLOC(d->realtrans, int *, d->tralloc + 1);
+  d->trans = d->realtrans + 1;
+  CALLOC(d->fails, int *, d->tralloc);
+  MALLOC(d->success, int, d->tralloc);
+  MALLOC(d->newlines, int, d->tralloc);
+  build_state(0, d);
+}
+
+/* Search through a buffer looking for a match to the given struct dfa.
+   Find the first occurrence of a string matching the regexp in the buffer,
+   and the shortest possible version thereof.  Return a pointer to the first
+   character after the match, or NULL if none is found.  Begin points to
+   the beginning of the buffer, and end points to the first character after
+   its end.  We store a newline in *end to act as a sentinel, so end had
+   better point somewhere valid.  Newline is a flag indicating whether to
+   allow newlines to be in the matching string.  If count is non-
+   NULL it points to a place we're supposed to increment every time we
+   see a newline.  Finally, if backref is non-NULL it points to a place
+   where we're supposed to store a 1 if backreferencing happened and the
+   match needs to be verified by a backtracking matcher.  Otherwise
+   we store a 0 in *backref. */
+char *
+dfaexec(d, begin, end, newline, count, backref)
+     struct dfa *d;
+     char *begin;
+     char *end;
+     int newline;
+     int *count;
+     int *backref;
+{
+  register int s, s1, tmp;	/* Current state. */
+  register unsigned char *p;	/* Current input character. */
+  register int **trans, *t;	/* Copy of d->trans so it can be optimized
+				   into a register. */
+  static int sbit[NOTCHAR];	/* Table for anding with d->success. */
+  static int sbit_init;
+
+  if (! sbit_init)
+    {
+      int i;
+
+      sbit_init = 1;
+      for (i = 0; i < NOTCHAR; ++i)
+	if (i == '\n')
+	  sbit[i] = 4;
+	else if (ISALNUM(i))
+	  sbit[i] = 2;
+	else
+	  sbit[i] = 1;
+    }
+
+  if (! d->tralloc)
+    build_state_zero(d);
+
+  s = s1 = 0;
+  p = (unsigned char *) begin;
+  trans = d->trans;
+  *end = '\n';
+
+  for (;;)
+    {
+      /* The dreaded inner loop. */
+      if ((t = trans[s]) != 0)
+	do
+	  {
+	    s1 = t[*p++];
+	    if (! (t = trans[s1]))
+	      goto last_was_s;
+	    s = t[*p++];
+	  }
+        while ((t = trans[s]) != 0);
+      goto last_was_s1;
+    last_was_s:
+      tmp = s, s = s1, s1 = tmp;
+    last_was_s1:
+
+      if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
+	{
+	  if (d->success[s] & sbit[*p])
+	    {
+	      if (backref)
+		if (d->states[s].backref)
+		  *backref = 1;
+		else
+		  *backref = 0;
+	      return (char *) p;
+	    }
+
+	  s1 = s;
+	  s = d->fails[s][*p++];
+	  continue;
+	}
+
+      /* If the previous character was a newline, count it. */
+      if (count && (char *) p <= end && p[-1] == '\n')
+	++*count;
+
+      /* Check if we've run off the end of the buffer. */
+      if ((char *) p > end)
+	return NULL;
+
+      if (s >= 0)
+	{
+	  build_state(s, d);
+	  trans = d->trans;
+	  continue;
+	}
+
+      if (p[-1] == '\n' && newline)
+	{
+	  s = d->newlines[s1];
+	  continue;
+	}
+
+      s = 0;
+    }
+}
+
+/* Initialize the components of a dfa that the other routines don't
+   initialize for themselves. */
+void
+dfainit(d)
+     struct dfa *d;
+{
+  d->calloc = 1;
+  MALLOC(d->charclasses, charclass, d->calloc);
+  d->cindex = 0;
+
+  d->talloc = 1;
+  MALLOC(d->tokens, token, d->talloc);
+  d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+
+  d->searchflag = 0;
+  d->tralloc = 0;
+
+  d->musts = 0;
+}
+
+/* Parse and analyze a single string of the given length. */
+void
+dfacomp(s, len, d, searchflag)
+     char *s;
+     size_t len;
+     struct dfa *d;
+     int searchflag;
+{
+  if (case_fold)	/* dummy folding in service of dfamust() */
+    {
+      char *lcopy;
+      int i;
+
+      lcopy = malloc(len);
+      if (!lcopy)
+	dfaerror("out of memory");
+      
+      /* This is a kludge. */
+      case_fold = 0;
+      for (i = 0; i < len; ++i)
+	if (ISUPPER(s[i]))
+	  lcopy[i] = tolower(s[i]);
+	else
+	  lcopy[i] = s[i];
+
+      dfainit(d);
+      dfaparse(lcopy, len, d);
+      free(lcopy);
+      dfamust(d);
+      d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
+      case_fold = 1;
+      dfaparse(s, len, d);
+      dfaanalyze(d, searchflag);
+    }
+  else
+    {
+        dfainit(d);
+        dfaparse(s, len, d);
+	dfamust(d);
+        dfaanalyze(d, searchflag);
+    }
+}
+
+/* Free the storage held by the components of a dfa. */
+void
+dfafree(d)
+     struct dfa *d;
+{
+  int i;
+  struct dfamust *dm, *ndm;
+
+  free((ptr_t) d->charclasses);
+  free((ptr_t) d->tokens);
+  for (i = 0; i < d->sindex; ++i)
+    free((ptr_t) d->states[i].elems.elems);
+  free((ptr_t) d->states);
+  for (i = 0; i < d->tindex; ++i)
+    if (d->follows[i].elems)
+      free((ptr_t) d->follows[i].elems);
+  free((ptr_t) d->follows);
+  for (i = 0; i < d->tralloc; ++i)
+    if (d->trans[i])
+      free((ptr_t) d->trans[i]);
+    else if (d->fails[i])
+      free((ptr_t) d->fails[i]);
+  if (d->realtrans) free((ptr_t) d->realtrans);
+  if (d->fails) free((ptr_t) d->fails);
+  if (d->newlines) free((ptr_t) d->newlines);
+  if (d->success) free((ptr_t) d->success);
+  for (dm = d->musts; dm; dm = ndm)
+    {
+      ndm = dm->next;
+      free(dm->must);
+      free((ptr_t) dm);
+    }
+}
+
+/* Having found the postfix representation of the regular expression,
+   try to find a long sequence of characters that must appear in any line
+   containing the r.e.
+   Finding a "longest" sequence is beyond the scope here;
+   we take an easy way out and hope for the best.
+   (Take "(ab|a)b"--please.)
+
+   We do a bottom-up calculation of sequences of characters that must appear
+   in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+   representation:
+	sequences that must appear at the left of the match ("left")
+	sequences that must appear at the right of the match ("right")
+	lists of sequences that must appear somewhere in the match ("in")
+	sequences that must constitute the match ("is")
+
+   When we get to the root of the tree, we use one of the longest of its
+   calculated "in" sequences as our answer.  The sequence we find is returned in
+   d->must (where "d" is the single argument passed to "dfamust");
+   the length of the sequence is returned in d->mustn.
+
+   The sequences calculated for the various types of node (in pseudo ANSI c)
+   are shown below.  "p" is the operand of unary operators (and the left-hand
+   operand of binary operators); "q" is the right-hand operand of binary
+   operators.
+
+   "ZERO" means "a zero-length sequence" below.
+
+	Type	left		right		is		in
+	----	----		-----		--		--
+	char c	# c		# c		# c		# c
+	
+	CSET	ZERO		ZERO		ZERO		ZERO
+	
+	STAR	ZERO		ZERO		ZERO		ZERO
+
+	QMARK	ZERO		ZERO		ZERO		ZERO
+
+	PLUS	p->left		p->right	ZERO		p->in
+
+	CAT	(p->is==ZERO)?	(q->is==ZERO)?	(p->is!=ZERO &&	p->in plus
+		p->left :	q->right :	q->is!=ZERO) ?	q->in plus
+		p->is##q->left	p->right##q->is	p->is##q->is :	p->right##q->left
+						ZERO
+					
+	OR	longest common	longest common	(do p->is and	substrings common to
+		leading		trailing	q->is have same	p->in and q->in
+		(sub)sequence	(sub)sequence	length and	
+		of p->left	of p->right	content) ?	
+		and q->left	and q->right	p->is : NULL	
+
+   If there's anything else we recognize in the tree, all four sequences get set
+   to zero-length sequences.  If there's something we don't recognize in the tree,
+   we just return a zero-length sequence.
+
+   Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+   'aaa')?
+
+   And. . .is it here or someplace that we might ponder "optimizations" such as
+	egrep 'psi|epsilon'	->	egrep 'psi'
+	egrep 'pepsi|epsilon'	->	egrep 'epsi'
+					(Yes, we now find "epsi" as a "string
+					that must occur", but we might also
+					simplify the *entire* r.e. being sought)
+	grep '[c]'		->	grep 'c'
+	grep '(ab|a)b'		->	grep 'ab'
+	grep 'ab*'		->	grep 'a'
+	grep 'a*b'		->	grep 'b'
+
+   There are several issues:
+
+   Is optimization easy (enough)?
+
+   Does optimization actually accomplish anything,
+   or is the automaton you get from "psi|epsilon" (for example)
+   the same as the one you get from "psi" (for example)?
+  
+   Are optimizable r.e.'s likely to be used in real-life situations
+   (something like 'ab*' is probably unlikely; something like is
+   'psi|epsilon' is likelier)? */
+
+static char *
+icatalloc(old, new)
+     char *old;
+     char *new;
+{
+  char *result;
+  size_t oldsize, newsize;
+
+  newsize = (new == NULL) ? 0 : strlen(new);
+  if (old == NULL)
+    oldsize = 0;
+  else if (newsize == 0)
+    return old;
+  else	oldsize = strlen(old);
+  if (old == NULL)
+    result = (char *) malloc(newsize + 1);
+  else
+    result = (char *) realloc((void *) old, oldsize + newsize + 1);
+  if (result != NULL && new != NULL)
+    (void) strcpy(result + oldsize, new);
+  return result;
+}
+
+static char *
+icpyalloc(string)
+     char *string;
+{
+  return icatalloc((char *) NULL, string);
+}
+
+static char *
+istrstr(lookin, lookfor)
+     char *lookin;
+     char *lookfor;
+{
+  char *cp;
+  size_t len;
+
+  len = strlen(lookfor);
+  for (cp = lookin; *cp != '\0'; ++cp)
+    if (strncmp(cp, lookfor, len) == 0)
+      return cp;
+  return NULL;
+}
+
+static void
+ifree(cp)
+     char *cp;
+{
+  if (cp != NULL)
+    free(cp);
+}
+
+static void
+freelist(cpp)
+     char **cpp;
+{
+  int i;
+
+  if (cpp == NULL)
+    return;
+  for (i = 0; cpp[i] != NULL; ++i)
+    {
+      free(cpp[i]);
+      cpp[i] = NULL;
+    }
+}
+
+static char **
+enlist(cpp, new, len)
+     char **cpp;
+     char *new;
+     size_t len;
+{
+  int i, j;
+
+  if (cpp == NULL)
+    return NULL;
+  if ((new = icpyalloc(new)) == NULL)
+    {
+      freelist(cpp);
+      return NULL;
+    }
+  new[len] = '\0';
+  /* Is there already something in the list that's new (or longer)? */
+  for (i = 0; cpp[i] != NULL; ++i)
+    if (istrstr(cpp[i], new) != NULL)
+      {
+	free(new);
+	return cpp;
+      }
+  /* Eliminate any obsoleted strings. */
+  j = 0;
+  while (cpp[j] != NULL)
+    if (istrstr(new, cpp[j]) == NULL)
+      ++j;
+    else
+      {
+	free(cpp[j]);
+	if (--i == j)
+	  break;
+	cpp[j] = cpp[i];
+	cpp[i] = NULL;
+      }
+  /* Add the new string. */
+  cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
+  if (cpp == NULL)
+    return NULL;
+  cpp[i] = new;
+  cpp[i + 1] = NULL;
+  return cpp;
+}
+
+/* Given pointers to two strings, return a pointer to an allocated
+   list of their distinct common substrings. Return NULL if something
+   seems wild. */
+static char **
+comsubs(left, right)
+     char *left;
+     char *right;
+{
+  char **cpp;
+  char *lcp;
+  char *rcp;
+  size_t i, len;
+
+  if (left == NULL || right == NULL)
+    return NULL;
+  cpp = (char **) malloc(sizeof *cpp);
+  if (cpp == NULL)
+    return NULL;
+  cpp[0] = NULL;
+  for (lcp = left; *lcp != '\0'; ++lcp)
+    {
+      len = 0;
+      rcp = index(right, *lcp);
+      while (rcp != NULL)
+	{
+	  for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+	    continue;
+	  if (i > len)
+	    len = i;
+	  rcp = index(rcp + 1, *lcp);
+	}
+      if (len == 0)
+	continue;
+      if ((cpp = enlist(cpp, lcp, len)) == NULL)
+	break;
+    }
+  return cpp;
+}
+
+static char **
+addlists(old, new)
+char **old;
+char **new;
+{
+  int i;
+
+  if (old == NULL || new == NULL)
+    return NULL;
+  for (i = 0; new[i] != NULL; ++i)
+    {
+      old = enlist(old, new[i], strlen(new[i]));
+      if (old == NULL)
+	break;
+    }
+  return old;
+}
+
+/* Given two lists of substrings, return a new list giving substrings
+   common to both. */
+static char **
+inboth(left, right)
+     char **left;
+     char **right;
+{
+  char **both;
+  char **temp;
+  int lnum, rnum;
+
+  if (left == NULL || right == NULL)
+    return NULL;
+  both = (char **) malloc(sizeof *both);
+  if (both == NULL)
+    return NULL;
+  both[0] = NULL;
+  for (lnum = 0; left[lnum] != NULL; ++lnum)
+    {
+      for (rnum = 0; right[rnum] != NULL; ++rnum)
+	{
+	  temp = comsubs(left[lnum], right[rnum]);
+	  if (temp == NULL)
+	    {
+	      freelist(both);
+	      return NULL;
+	    }
+	  both = addlists(both, temp);
+	  freelist(temp);
+	  free(temp);
+	  if (both == NULL)
+	    return NULL;
+	}
+    }
+  return both;
+}
+
+typedef struct
+{
+  char **in;
+  char *left;
+  char *right;
+  char *is;
+} must;
+
+static void
+resetmust(mp)
+must *mp;
+{
+  mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+  freelist(mp->in);
+}
+
+static void
+dfamust(dfa)
+struct dfa *dfa;
+{
+  must *musts;
+  must *mp;
+  char *result;
+  int ri;
+  int i;
+  int exact;
+  token t;
+  static must must0;
+  struct dfamust *dm;
+  static char empty_string[] = "";
+
+  result = empty_string;
+  exact = 0;
+  musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
+  if (musts == NULL)
+    return;
+  mp = musts;
+  for (i = 0; i <= dfa->tindex; ++i)
+    mp[i] = must0;
+  for (i = 0; i <= dfa->tindex; ++i)
+    {
+      mp[i].in = (char **) malloc(sizeof *mp[i].in);
+      mp[i].left = malloc(2);
+      mp[i].right = malloc(2);
+      mp[i].is = malloc(2);
+      if (mp[i].in == NULL || mp[i].left == NULL ||
+	  mp[i].right == NULL || mp[i].is == NULL)
+	goto done;
+      mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
+      mp[i].in[0] = NULL;
+    }
+#ifdef DEBUG
+  fprintf(stderr, "dfamust:\n");
+  for (i = 0; i < dfa->tindex; ++i)
+    {
+      fprintf(stderr, " %d:", i);
+      prtok(dfa->tokens[i]);
+    }
+  putc('\n', stderr);
+#endif
+  for (ri = 0; ri < dfa->tindex; ++ri)
+    {
+      switch (t = dfa->tokens[ri])
+	{
+	case LPAREN:
+	case RPAREN:
+	  goto done;		/* "cannot happen" */
+	case EMPTY:
+	case BEGLINE:
+	case ENDLINE:
+	case BEGWORD:
+	case ENDWORD:
+	case LIMWORD:
+	case NOTLIMWORD:
+	case BACKREF:
+	  resetmust(mp);
+	  break;
+	case STAR:
+	case QMARK:
+	  if (mp <= musts)
+	    goto done;		/* "cannot happen" */
+	  --mp;
+	  resetmust(mp);
+	  break;
+	case OR:
+	case ORTOP:
+	  if (mp < &musts[2])
+	    goto done;		/* "cannot happen" */
+	  {
+	    char **new;
+	    must *lmp;
+	    must *rmp;
+	    int j, ln, rn, n;
+
+	    rmp = --mp;
+	    lmp = --mp;
+	    /* Guaranteed to be.  Unlikely, but. . . */
+	    if (strcmp(lmp->is, rmp->is) != 0)
+	      lmp->is[0] = '\0';
+	    /* Left side--easy */
+	    i = 0;
+	    while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
+	      ++i;
+	    lmp->left[i] = '\0';
+	    /* Right side */
+	    ln = strlen(lmp->right);
+	    rn = strlen(rmp->right);
+	    n = ln;
+	    if (n > rn)
+	      n = rn;
+	    for (i = 0; i < n; ++i)
+	      if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
+		break;
+	    for (j = 0; j < i; ++j)
+	      lmp->right[j] = lmp->right[(ln - i) + j];
+	    lmp->right[j] = '\0';
+	    new = inboth(lmp->in, rmp->in);
+	    if (new == NULL)
+	      goto done;
+	    freelist(lmp->in);
+	    free((char *) lmp->in);
+	    lmp->in = new;
+	  }
+	  break;
+	case PLUS:
+	  if (mp <= musts)
+	    goto done;		/* "cannot happen" */
+	  --mp;
+	  mp->is[0] = '\0';
+	  break;
+	case END:
+	  if (mp != &musts[1])
+	    goto done;		/* "cannot happen" */
+	  for (i = 0; musts[0].in[i] != NULL; ++i)
+	    if (strlen(musts[0].in[i]) > strlen(result))
+	      result = musts[0].in[i];
+	  if (strcmp(result, musts[0].is) == 0)
+	    exact = 1;
+	  goto done;
+	case CAT:
+	  if (mp < &musts[2])
+	    goto done;		/* "cannot happen" */
+	  {
+	    must *lmp;
+	    must *rmp;
+
+	    rmp = --mp;
+	    lmp = --mp;
+	    /* In.  Everything in left, plus everything in
+	       right, plus catenation of
+	       left's right and right's left. */
+	    lmp->in = addlists(lmp->in, rmp->in);
+	    if (lmp->in == NULL)
+	      goto done;
+	    if (lmp->right[0] != '\0' &&
+		rmp->left[0] != '\0')
+	      {
+		char *tp;
+
+		tp = icpyalloc(lmp->right);
+		if (tp == NULL)
+		  goto done;
+		tp = icatalloc(tp, rmp->left);
+		if (tp == NULL)
+		  goto done;
+		lmp->in = enlist(lmp->in, tp,
+				 strlen(tp));
+		free(tp);
+		if (lmp->in == NULL)
+		  goto done;
+	      }
+	    /* Left-hand */
+	    if (lmp->is[0] != '\0')
+	      {
+		lmp->left = icatalloc(lmp->left,
+				      rmp->left);
+		if (lmp->left == NULL)
+		  goto done;
+	      }
+	    /* Right-hand */
+	    if (rmp->is[0] == '\0')
+	      lmp->right[0] = '\0';
+	    lmp->right = icatalloc(lmp->right, rmp->right);
+	    if (lmp->right == NULL)
+	      goto done;
+	    /* Guaranteed to be */
+	    if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
+	      {
+		lmp->is = icatalloc(lmp->is, rmp->is);
+		if (lmp->is == NULL)
+		  goto done;
+	      }
+	    else
+	      lmp->is[0] = '\0';
+	  }
+	  break;
+	default:
+	  if (t < END)
+	    {
+	      /* "cannot happen" */
+	      goto done;
+	    }
+	  else if (t == '\0')
+	    {
+	      /* not on *my* shift */
+	      goto done;
+	    }
+	  else if (t >= CSET)
+	    {
+	      /* easy enough */
+	      resetmust(mp);
+	    }
+	  else
+	    {
+	      /* plain character */
+	      resetmust(mp);
+	      mp->is[0] = mp->left[0] = mp->right[0] = t;
+	      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+	      mp->in = enlist(mp->in, mp->is, (size_t)1);
+	      if (mp->in == NULL)
+		goto done;
+	    }
+	  break;
+	}
+#ifdef DEBUG
+      fprintf(stderr, " node: %d:", ri);
+      prtok(dfa->tokens[ri]);
+      fprintf(stderr, "\n  in:");
+      for (i = 0; mp->in[i]; ++i)
+	fprintf(stderr, " \"%s\"", mp->in[i]);
+      fprintf(stderr, "\n  is: \"%s\"\n", mp->is);
+      fprintf(stderr, "  left: \"%s\"\n", mp->left);
+      fprintf(stderr, "  right: \"%s\"\n", mp->right);
+#endif
+      ++mp;
+    }
+ done:
+  if (strlen(result))
+    {
+      dm = (struct dfamust *) malloc(sizeof (struct dfamust));
+      dm->exact = exact;
+      dm->must = malloc(strlen(result) + 1);
+      strcpy(dm->must, result);
+      dm->next = dfa->musts;
+      dfa->musts = dm;
+    }
+  mp = musts;
+  for (i = 0; i <= dfa->tindex; ++i)
+    {
+      freelist(mp[i].in);
+      ifree((char *) mp[i].in);
+      ifree(mp[i].left);
+      ifree(mp[i].right);
+      ifree(mp[i].is);
+    }
+  free((char *) mp);
+}
diff --git a/contrib/awk/dfa.h b/contrib/awk/dfa.h
new file mode 100644
index 0000000..dda5181
--- /dev/null
+++ b/contrib/awk/dfa.h
@@ -0,0 +1,364 @@
+/* dfa.h - declarations for GNU deterministic regexp compiler
+   Copyright (C) 1988 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA */
+
+/* Written June, 1988 by Mike Haertel */
+
+/* FIXME:
+   2.  We should not export so much of the DFA internals.
+   In addition to clobbering modularity, we eat up valuable
+   name space. */
+
+/* Number of bits in an unsigned char. */
+#ifndef CHARBITS
+#define CHARBITS 8
+#endif
+
+/* First integer value that is greater than any character code. */
+#define NOTCHAR (1 << CHARBITS)
+
+/* INTBITS need not be exact, just a lower bound. */
+#ifndef INTBITS
+#define INTBITS (CHARBITS * sizeof (int))
+#endif
+
+/* Number of ints required to hold a bit for every character. */
+#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
+
+/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
+typedef int charclass[CHARCLASS_INTS];
+
+/* The regexp is parsed into an array of tokens in postfix form.  Some tokens
+   are operators and others are terminal symbols.  Most (but not all) of these
+   codes are returned by the lexical analyzer. */
+
+typedef enum
+{
+  END = -1,			/* END is a terminal symbol that matches the
+				   end of input; any value of END or less in
+				   the parse tree is such a symbol.  Accepting
+				   states of the DFA are those that would have
+				   a transition on END. */
+
+  /* Ordinary character values are terminal symbols that match themselves. */
+
+  EMPTY = NOTCHAR,		/* EMPTY is a terminal symbol that matches
+				   the empty string. */
+
+  BACKREF,			/* BACKREF is generated by \<digit>; it
+				   it not completely handled.  If the scanner
+				   detects a transition on backref, it returns
+				   a kind of "semi-success" indicating that
+				   the match will have to be verified with
+				   a backtracking matcher. */
+
+  BEGLINE,			/* BEGLINE is a terminal symbol that matches
+				   the empty string if it is at the beginning
+				   of a line. */
+
+  ENDLINE,			/* ENDLINE is a terminal symbol that matches
+				   the empty string if it is at the end of
+				   a line. */
+
+  BEGWORD,			/* BEGWORD is a terminal symbol that matches
+				   the empty string if it is at the beginning
+				   of a word. */
+
+  ENDWORD,			/* ENDWORD is a terminal symbol that matches
+				   the empty string if it is at the end of
+				   a word. */
+
+  LIMWORD,			/* LIMWORD is a terminal symbol that matches
+				   the empty string if it is at the beginning
+				   or the end of a word. */
+
+  NOTLIMWORD,			/* NOTLIMWORD is a terminal symbol that
+				   matches the empty string if it is not at
+				   the beginning or end of a word. */
+
+  QMARK,			/* QMARK is an operator of one argument that
+				   matches zero or one occurences of its
+				   argument. */
+
+  STAR,				/* STAR is an operator of one argument that
+				   matches the Kleene closure (zero or more
+				   occurrences) of its argument. */
+
+  PLUS,				/* PLUS is an operator of one argument that
+				   matches the positive closure (one or more
+				   occurrences) of its argument. */
+
+  REPMN,			/* REPMN is a lexical token corresponding
+				   to the {m,n} construct.  REPMN never
+				   appears in the compiled token vector. */
+
+  CAT,				/* CAT is an operator of two arguments that
+				   matches the concatenation of its
+				   arguments.  CAT is never returned by the
+				   lexical analyzer. */
+
+  OR,				/* OR is an operator of two arguments that
+				   matches either of its arguments. */
+
+  ORTOP,			/* OR at the toplevel in the parse tree.
+				   This is used for a boyer-moore heuristic. */
+
+  LPAREN,			/* LPAREN never appears in the parse tree,
+				   it is only a lexeme. */
+
+  RPAREN,			/* RPAREN never appears in the parse tree. */
+
+  CSET				/* CSET and (and any value greater) is a
+				   terminal symbol that matches any of a
+				   class of characters. */
+} token;
+
+/* Sets are stored in an array in the compiled dfa; the index of the
+   array corresponding to a given set token is given by SET_INDEX(t). */
+#define SET_INDEX(t) ((t) - CSET)
+
+/* Sometimes characters can only be matched depending on the surrounding
+   context.  Such context decisions depend on what the previous character
+   was, and the value of the current (lookahead) character.  Context
+   dependent constraints are encoded as 8 bit integers.  Each bit that
+   is set indicates that the constraint succeeds in the corresponding
+   context.
+
+   bit 7 - previous and current are newlines
+   bit 6 - previous was newline, current isn't
+   bit 5 - previous wasn't newline, current is
+   bit 4 - neither previous nor current is a newline
+   bit 3 - previous and current are word-constituents
+   bit 2 - previous was word-constituent, current isn't
+   bit 1 - previous wasn't word-constituent, current is
+   bit 0 - neither previous nor current is word-constituent
+
+   Word-constituent characters are those that satisfy isalnum().
+
+   The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+   succeeds in a particular context.  Prevn is true if the previous character
+   was a newline, currn is true if the lookahead character is a newline.
+   Prevl and currl similarly depend upon whether the previous and current
+   characters are word-constituent letters. */
+#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+  ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
+#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+  ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
+#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+  (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn)		     \
+   && MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+
+/* The following macros give information about what a constraint depends on. */
+#define PREV_NEWLINE_DEPENDENT(constraint) \
+  (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+#define PREV_LETTER_DEPENDENT(constraint) \
+  (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
+
+/* Tokens that match the empty string subject to some constraint actually
+   work by applying that constraint to determine what may follow them,
+   taking into account what has gone before.  The following values are
+   the constraints corresponding to the special tokens previously defined. */
+#define NO_CONSTRAINT 0xff
+#define BEGLINE_CONSTRAINT 0xcf
+#define ENDLINE_CONSTRAINT 0xaf
+#define BEGWORD_CONSTRAINT 0xf2
+#define ENDWORD_CONSTRAINT 0xf4
+#define LIMWORD_CONSTRAINT 0xf6
+#define NOTLIMWORD_CONSTRAINT 0xf9
+
+/* States of the recognizer correspond to sets of positions in the parse
+   tree, together with the constraints under which they may be matched.
+   So a position is encoded as an index into the parse tree together with
+   a constraint. */
+typedef struct
+{
+  unsigned index;		/* Index into the parse array. */
+  unsigned constraint;		/* Constraint for matching this position. */
+} position;
+
+/* Sets of positions are stored as arrays. */
+typedef struct
+{
+  position *elems;		/* Elements of this position set. */
+  int nelem;			/* Number of elements in this set. */
+} position_set;
+
+/* A state of the dfa consists of a set of positions, some flags,
+   and the token value of the lowest-numbered position of the state that
+   contains an END token. */
+typedef struct
+{
+  int hash;			/* Hash of the positions of this state. */
+  position_set elems;		/* Positions this state could match. */
+  char newline;			/* True if previous state matched newline. */
+  char letter;			/* True if previous state matched a letter. */
+  char backref;			/* True if this state matches a \<digit>. */
+  unsigned char constraint;	/* Constraint for this state to accept. */
+  int first_end;		/* Token value of the first END in elems. */
+} dfa_state;
+
+/* Element of a list of strings, at least one of which is known to
+   appear in any R.E. matching the DFA. */
+struct dfamust
+{
+  int exact;
+  char *must;
+  struct dfamust *next;
+};
+
+/* A compiled regular expression. */
+struct dfa
+{
+  /* Stuff built by the scanner. */
+  charclass *charclasses;	/* Array of character sets for CSET tokens. */
+  int cindex;			/* Index for adding new charclasses. */
+  int calloc;			/* Number of charclasses currently allocated. */
+
+  /* Stuff built by the parser. */
+  token *tokens;		/* Postfix parse array. */
+  int tindex;			/* Index for adding new tokens. */
+  int talloc;			/* Number of tokens currently allocated. */
+  int depth;			/* Depth required of an evaluation stack
+				   used for depth-first traversal of the
+				   parse tree. */
+  int nleaves;			/* Number of leaves on the parse tree. */
+  int nregexps;			/* Count of parallel regexps being built
+				   with dfaparse(). */
+
+  /* Stuff owned by the state builder. */
+  dfa_state *states;		/* States of the dfa. */
+  int sindex;			/* Index for adding new states. */
+  int salloc;			/* Number of states currently allocated. */
+
+  /* Stuff built by the structure analyzer. */
+  position_set *follows;	/* Array of follow sets, indexed by position
+				   index.  The follow of a position is the set
+				   of positions containing characters that
+				   could conceivably follow a character
+				   matching the given position in a string
+				   matching the regexp.  Allocated to the
+				   maximum possible position index. */
+  int searchflag;		/* True if we are supposed to build a searching
+				   as opposed to an exact matcher.  A searching
+				   matcher finds the first and shortest string
+				   matching a regexp anywhere in the buffer,
+				   whereas an exact matcher finds the longest
+				   string matching, but anchored to the
+				   beginning of the buffer. */
+
+  /* Stuff owned by the executor. */
+  int tralloc;			/* Number of transition tables that have
+				   slots so far. */
+  int trcount;			/* Number of transition tables that have
+				   actually been built. */
+  int **trans;			/* Transition tables for states that can
+				   never accept.  If the transitions for a
+				   state have not yet been computed, or the
+				   state could possibly accept, its entry in
+				   this table is NULL. */
+  int **realtrans;		/* Trans always points to realtrans + 1; this
+				   is so trans[-1] can contain NULL. */
+  int **fails;			/* Transition tables after failing to accept
+				   on a state that potentially could do so. */
+  int *success;			/* Table of acceptance conditions used in
+				   dfaexec and computed in build_state. */
+  int *newlines;		/* Transitions on newlines.  The entry for a
+				   newline in any transition table is always
+				   -1 so we can count lines without wasting
+				   too many cycles.  The transition for a
+				   newline is stored separately and handled
+				   as a special case.  Newline is also used
+				   as a sentinel at the end of the buffer. */
+  struct dfamust *musts;	/* List of strings, at least one of which
+				   is known to appear in any r.e. matching
+				   the dfa. */
+};
+
+/* Some macros for user access to dfa internals. */
+
+/* ACCEPTING returns true if s could possibly be an accepting state of r. */
+#define ACCEPTING(s, r) ((r).states[s].constraint)
+
+/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
+   specified context. */
+#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
+  SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint,		   \
+		       prevn, currn, prevl, currl)
+
+/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
+   regexps that a given state could accept.  Parallel regexps are numbered
+   starting at 1. */
+#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
+
+/* Entry points. */
+
+#ifdef __STDC__
+
+/* dfasyntax() takes two arguments; the first sets the syntax bits described
+   earlier in this file, and the second sets the case-folding flag. */
+extern void dfasyntax(reg_syntax_t, int);
+
+/* Compile the given string of the given length into the given struct dfa.
+   Final argument is a flag specifying whether to build a searching or an
+   exact matcher. */
+extern void dfacomp(char *, size_t, struct dfa *, int);
+
+/* Execute the given struct dfa on the buffer of characters.  The
+   first char * points to the beginning, and the second points to the
+   first character after the end of the buffer, which must be a writable
+   place so a sentinel end-of-buffer marker can be stored there.  The
+   second-to-last argument is a flag telling whether to allow newlines to
+   be part of a string matching the regexp.  The next-to-last argument,
+   if non-NULL, points to a place to increment every time we see a
+   newline.  The final argument, if non-NULL, points to a flag that will
+   be set if further examination by a backtracking matcher is needed in
+   order to verify backreferencing; otherwise the flag will be cleared.
+   Returns NULL if no match is found, or a pointer to the first
+   character after the first & shortest matching string in the buffer. */
+extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
+
+/* Free the storage held by the components of a struct dfa. */
+extern void dfafree(struct dfa *);
+
+/* Entry points for people who know what they're doing. */
+
+/* Initialize the components of a struct dfa. */
+extern void dfainit(struct dfa *);
+
+/* Incrementally parse a string of given length into a struct dfa. */
+extern void dfaparse(char *, size_t, struct dfa *);
+
+/* Analyze a parsed regexp; second argument tells whether to build a searching
+   or an exact matcher. */
+extern void dfaanalyze(struct dfa *, int);
+
+/* Compute, for each possible character, the transitions out of a given
+   state, storing them in an array of integers. */
+extern void dfastate(int, struct dfa *, int []);
+
+/* Error handling. */
+
+/* dfaerror() is called by the regexp routines whenever an error occurs.  It
+   takes a single argument, a NUL-terminated string describing the error.
+   The default dfaerror() prints the error message to stderr and exits.
+   The user can provide a different dfafree() if so desired. */
+extern void dfaerror(const char *);
+
+#else /* ! __STDC__ */
+extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
+extern void dfaanalyze(), dfastate(), dfaerror();
+extern char *dfaexec();
+#endif /* ! __STDC__ */
diff --git a/contrib/awk/doc/ChangeLog b/contrib/awk/doc/ChangeLog
new file mode 100644
index 0000000..660436a
--- /dev/null
+++ b/contrib/awk/doc/ChangeLog
@@ -0,0 +1,91 @@
+Thu May 15 12:49:08 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.3: Release tar file made.
+
+Fri Apr 18 07:55:47 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* BETA Release 3.0.34: Release tar file made.
+
+Sun Apr 13 15:39:20 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in ($(infodir)/gawk.info): exit 0 in case install-info
+	  fails.
+
+Thu Jan  2 23:17:53 1997  Fred Fish  <fnf@ninemoons.com>
+
+	* Makefile.in (awkcard.tr): Use ':' chars to separate parts of
+	  sed command, since $(srcdir) may expand to something with '/'
+	  characters in it, which confuses sed terribly.
+	* gawk.texi (Amiga Installation): Note change of configuration
+	  from "m68k-cbm-amigados" to "m68k-amigaos".  Point ftp users
+	  towards current ADE distribution and not obsolete Aminet
+	  "gcc" distribution.  Change "FreshFish" to "Geek Gadgets".
+
+Wed Dec 25 11:25:22 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.2: Release tar file made.
+
+Wed Dec 25 11:17:32 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in ($(mandir)/igawk$(manext),$(mandir)/gawk$(manext)):
+	  remove chmod command; let $(INSTALL_DATA) use -m.
+
+Tue Dec 17 22:38:28 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (gawk.info,gawk.dvi,postscript): run makeinfo, TeX,
+	  and/or troff against files in $(srcdir). Thanks to Ulrich Drepper.
+	  ($(infodir)/gawk.info): use --info-dir to install-info, not
+	  --infodir.
+
+Tue Dec 10 23:09:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.1: Release tar file made.
+
+Mon Dec  9 12:48:54 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* no.colors: new file from Michal for old troffs.
+	* Makefile.in [AWKCARD]: changes to parameterize old/new troff.
+
+Sun Dec  1 15:04:56 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* texinfo.tex: Updated to version 2.193, from Karl Berry.
+
+Tue Nov 26 22:57:15 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in ($(infodir)/gawk.info): Change option in call
+	  to `install-info' to `--info-dir' from `--infodir'.
+
+Mon Nov  4 13:30:39 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in: updates for reference card.
+	  (ad.block, awkcard.in, cardfonts, colors, macros, setter.outline):
+	  new files for reference card.
+
+Wed Oct 16 12:43:02 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* texinfo.tex: Updated to version 2.185, from texinfo-3.9 dist.
+
+Sun Aug 11 23:12:08 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in ($(infodir)/gawk.info): correct use of
+	  $(INSTALL_DATA) and remove chmod command.
+
+Thu Jul 11 22:06:50 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in ($(mandir)/gawk.$(ext), $(mandir)/igawk.$(ext)):
+	  made dependant on files in $(srcdir).
+
+Fri Mar 15 06:45:35 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (clean): add `*~' to list of files to be removed.
+
+Thu Jan 25 23:40:15 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (dvi): run texindex and tex an extra time.
+	  This gets the cross references right. Sigh.
+
+Wed Jan 24 11:51:54 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (maintainer-clean):
+	  Depend on distclean, not the other way around.
+	  Output warning message as per GNU standards.
diff --git a/contrib/awk/doc/awk.1 b/contrib/awk/doc/awk.1
new file mode 100644
index 0000000..0568c16
--- /dev/null
+++ b/contrib/awk/doc/awk.1
@@ -0,0 +1,2621 @@
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "Dec 19 1996" "Free Software Foundation" "Utility Commands"
+.SH NAME
+gawk \- pattern scanning and processing language
+.SH SYNOPSIS
+.B gawk
+[ POSIX or GNU style options ]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B gawk
+[ POSIX or GNU style options ]
+[
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Gawk
+is the GNU Project's implementation of the AWK programming language.
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard.
+This version in turn is based on the description in
+.IR "The AWK Programming Language" ,
+by Aho, Kernighan, and Weinberger,
+with the additional features found in the System V Release 4 version
+of \*(UX
+.IR awk .
+.I Gawk
+also provides more recent Bell Labs
+.I awk
+extensions, and some GNU-specific extensions.
+.PP
+The command line consists of options to
+.I gawk
+itself, the AWK program text (if not supplied via the
+.B \-f
+or
+.B \-\^\-file
+options), and values to be made
+available in the
+.B ARGC
+and
+.B ARGV
+pre-defined AWK variables.
+.SH OPTION FORMAT
+.PP
+.I Gawk
+options may be either the traditional \*(PX one letter options,
+or the GNU style long options.  \*(PX options start with a single ``\-'',
+while long options start with ``\-\^\-''.
+Long options are provided for both GNU-specific features and
+for \*(PX mandated features.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option.  Multiple
+.B \-W
+options may be supplied
+Each
+.B \-W
+option has a corresponding long option, as detailed below.
+Arguments to long options are either joined with the option
+by an
+.B =
+sign, with no intervening spaces, or they may be provided in the
+next command line argument.
+Long options may be abbreviated, as long as the abbreviation
+remains unique.
+.SH OPTIONS
+.PP
+.I Gawk
+accepts the following options.
+.TP
+.PD 0
+.BI \-F " fs"
+.TP
+.PD
+.BI \-\^\-field-separator " fs"
+Use
+.I fs
+for the input field separator (the value of the
+.B FS
+predefined
+variable).
+.TP
+.PD 0
+\fB\-v\fI var\fB\^=\^\fIval\fR
+.TP
+.PD
+\fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR
+Assign the value
+.IR val ,
+to the variable
+.IR var ,
+before execution of the program begins.
+Such variable values are available to the
+.B BEGIN
+block of an AWK program.
+.TP
+.PD 0
+.BI \-f " program-file"
+.TP
+.PD
+.BI \-\^\-file " program-file"
+Read the AWK program source from the file
+.IR program-file ,
+instead of from the first command line argument.
+Multiple
+.B \-f
+(or
+.BR \-\^\-file )
+options may be used.
+.TP
+.PD 0
+.BI \-mf " NNN"
+.TP
+.PD
+.BI \-mr " NNN"
+Set various memory limits to the value
+.IR NNN .
+The
+.B f
+flag sets the maximum number of fields, and the
+.B r
+flag sets the maximum record size.  These two flags and the
+.B \-m
+option are from the Bell Labs research version of \*(UX
+.IR awk .
+They are ignored by
+.IR gawk ,
+since
+.I gawk
+has no pre-defined limits.
+.TP
+.PD 0
+.B "\-W traditional"
+.TP
+.PD 0
+.B "\-W compat"
+.TP
+.PD 0
+.B \-\^\-traditional
+.TP
+.PD
+.B \-\^\-compat
+Run in
+.I compatibility
+mode.  In compatibility mode,
+.I gawk
+behaves identically to \*(UX
+.IR awk ;
+none of the GNU-specific extensions are recognized.
+The use of
+.B \-\^\-traditional
+is preferred over the other forms of this option.
+See
+.BR "GNU EXTENSIONS" ,
+below, for more information.
+.TP
+.PD 0
+.B "\-W copyleft"
+.TP
+.PD 0
+.B "\-W copyright"
+.TP
+.PD 0
+.B \-\^\-copyleft
+.TP
+.PD
+.B \-\^\-copyright
+Print the short version of the GNU copyright information message on
+the standard output, and exits successfully.
+.TP
+.PD 0
+.B "\-W help"
+.TP
+.PD 0
+.B "\-W usage"
+.TP
+.PD 0
+.B \-\^\-help
+.TP
+.PD
+.B \-\^\-usage
+Print a relatively short summary of the available options on
+the standard output.
+(Per the
+.IR "GNU Coding Standards" ,
+these options cause an immediate, successful exit.)
+.TP
+.PD 0
+.B "\-W lint"
+.TP
+.PD
+.B \-\^\-lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.TP
+.PD 0
+.B "\-W lint\-old"
+.TP
+.PD
+.B \-\^\-lint\-old
+Provide warnings about constructs that are
+not portable to the original version of Unix
+.IR awk .
+.ig
+.\" This option is left undocumented, on purpose.
+.TP
+.PD 0
+.B "\-W nostalgia"
+.TP
+.PD
+.B \-\^\-nostalgia
+Provide a moment of nostalgia for long time
+.I awk
+users.
+..
+.TP
+.PD 0
+.B "\-W posix"
+.TP
+.PD
+.B \-\^\-posix
+This turns on
+.I compatibility 
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+Only space and tab act as field separators when
+.B FS
+is set to a single space, newline does not.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.TP
+\(bu
+The
+.B fflush()
+function is not available.
+.RE
+.TP
+.PD 0
+.B "\-W re\-interval"
+.TP
+.PD
+.B \-\^\-re\-interval
+Enable the use of
+.I "interval expressions"
+in regular expression matching
+(see
+.BR "Regular Expressions" ,
+below).
+Interval expressions were not traditionally available in the
+AWK language. The POSIX standard added them, to make
+.I awk
+and
+.I egrep
+consistent with each other.
+However, their use is likely
+to break old AWK programs, so
+.I gawk
+only provides them if they are requested with this option, or when
+.B \-\^\-posix
+is specified.
+.TP
+.PD 0
+.BI "\-W source " program-text
+.TP
+.PD
+.BI \-\^\-source " program-text"
+Use
+.I program-text
+as AWK program source code.
+This option allows the easy intermixing of library functions (used via the 
+.B \-f
+and
+.B \-\^\-file
+options) with source code entered on the command line.
+It is intended primarily for medium to large AWK programs used
+in shell scripts.
+.TP
+.PD 0
+.B "\-W version"
+.TP
+.PD
+.B \-\^\-version
+Print version information for this particular copy of
+.I gawk
+on the standard output.
+This is useful mainly for knowing if the current copy of
+.I gawk
+on your system
+is up to date with respect to whatever the Free Software Foundation
+is distributing.
+This is also useful when reporting bugs.
+(Per the
+.IR "GNU Coding Standards" ,
+these options cause an immediate, successful exit.)
+.TP
+.B \-\^\-
+Signal the end of options. This is useful to allow further arguments to the
+AWK program itself to start with a ``\-''.
+This is mainly for consistency with the argument parsing convention used
+by most other \*(PX programs.
+.PP
+In compatibility mode,
+any other options are flagged as illegal, but are otherwise ignored.
+In normal operation, as long as program text has been supplied, unknown
+options are passed on to the AWK program in the
+.B ARGV
+array for processing.  This is particularly useful for running AWK
+programs via the ``#!'' executable interpreter mechanism.
+.SH AWK PROGRAM EXECUTION
+.PP
+An AWK program consists of a sequence of pattern-action statements
+and optional function definitions.
+.RS
+.PP
+\fIpattern\fB	{ \fIaction statements\fB }\fR
+.br
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
+.RE
+.PP
+.I Gawk
+first reads the program source from the
+.IR program-file (s)
+if specified,
+from arguments to
+.BR \-\^\-source ,
+or from the first non-option argument on the command line.
+The
+.B \-f
+and
+.B \-\^\-source
+options may be used multiple times on the command line.
+.I Gawk
+will read the program text as if all the
+.IR program-file s
+and command line source texts
+had been concatenated together.  This is useful for building libraries
+of AWK functions, without having to include them in each new AWK
+program that uses them.  It also provides the ability to mix library
+functions with command line programs.
+.PP
+The environment variable
+.B AWKPATH
+specifies a search path to use when finding source files named with
+the 
+.B \-f
+option.  If this variable does not exist, the default path is
+\fB".:/usr/local/share/awk"\fR.
+(The actual directory may vary, depending upon how
+.I gawk
+was built and installed.)
+If a file name given to the
+.B \-f
+option contains a ``/'' character, no path search is performed.
+.PP
+.I Gawk
+executes AWK programs in the following order.
+First,
+all variable assignments specified via the
+.B \-v
+option are performed.
+Next,
+.I gawk
+compiles the program into an internal form.
+Then,
+.I gawk
+executes the code in the
+.B BEGIN
+block(s) (if any),
+and then proceeds to read
+each file named in the
+.B ARGV
+array.
+If there are no files named on the command line,
+.I gawk
+reads the standard input.
+.PP
+If a filename on the command line has the form
+.IB var = val
+it is treated as a variable assignment. The variable
+.I var
+will be assigned the value
+.IR val .
+(This happens after any
+.B BEGIN
+block(s) have been run.)
+Command line variable assignment
+is most useful for dynamically assigning values to the variables
+AWK uses to control how input is broken into fields and records. It
+is also useful for controlling state if multiple passes are needed over
+a single data file.
+.PP
+If the value of a particular element of
+.B ARGV
+is empty (\fB""\fR),
+.I gawk
+skips over it.
+.PP
+For each record in the input,
+.I gawk
+tests to see if it matches any
+.I pattern
+in the AWK program.
+For each pattern that the record matches, the associated
+.I action
+is executed.
+The patterns are tested in the order they occur in the program.
+.PP
+Finally, after all the input is exhausted,
+.I gawk
+executes the code in the
+.B END
+block(s) (if any).
+.SH VARIABLES, RECORDS AND FIELDS
+AWK variables are dynamic; they come into existence when they are
+first used. Their values are either floating-point numbers or strings,
+or both,
+depending upon how they are used. AWK also has one dimensional
+arrays; arrays with multiple dimensions may be simulated.
+Several pre-defined variables are set as a program
+runs; these will be described as needed and summarized below.
+.SS Records
+Normally, records are separated by newline characters. You can control how
+records are separated by assigning values to the built-in variable
+.BR RS .
+If 
+.B RS
+is any single character, that character separates records.
+Otherwise,
+.B RS
+is a regular expression.  Text in the input that matches this
+regular expression will separate the record.
+However, in compatibility mode,
+only the first character of its string
+value is used for separating records.
+If
+.B RS
+is set to the null string, then records are separated by
+blank lines.
+When
+.B RS
+is set to the null string, the newline character always acts as
+a field separator, in addition to whatever value
+.B FS
+may have.
+.SS Fields
+.PP
+As each input record is read,
+.I gawk
+splits the record into
+.IR fields ,
+using the value of the
+.B FS
+variable as the field separator.
+If
+.B FS
+is a single character, fields are separated by that character.
+If
+.B FS
+is the null string, then each individual character becomes a
+separate field.
+Otherwise,
+.B FS
+is expected to be a full regular expression.
+In the special case that
+.B FS
+is a single space, fields are separated
+by runs of spaces and/or tabs and/or newlines.
+(But see the discussion of
+.BR \-\-posix ,
+below).
+Note that the value of
+.B IGNORECASE
+(see below) will also affect how fields are split when
+.B FS
+is a regular expression, and how records are separated when
+.B RS
+is a regular expression.
+.PP
+If the
+.B FIELDWIDTHS
+variable is set to a space separated list of numbers, each field is
+expected to have fixed width, and
+.I gawk
+will split up the record using the specified widths.  The value of
+.B FS
+is ignored.
+Assigning a new value to
+.B FS
+overrides the use of
+.BR FIELDWIDTHS ,
+and restores the default behavior.
+.PP
+Each field in the input record may be referenced by its position,
+.BR $1 ,
+.BR $2 ,
+and so on.
+.B $0
+is the whole record. The value of a field may be assigned to as well.
+Fields need not be referenced by constants:
+.RS
+.PP
+.ft B
+n = 5
+.br
+print $n
+.ft R
+.RE
+.PP
+prints the fifth field in the input record.
+The variable
+.B NF
+is set to the total number of fields in the input record.
+.PP
+References to non-existent fields (i.e. fields after
+.BR $NF )
+produce the null-string. However, assigning to a non-existent field
+(e.g., 
+.BR "$(NF+2) = 5" )
+will increase the value of
+.BR NF ,
+create any intervening fields with the null string as their value, and
+cause the value of
+.B $0
+to be recomputed, with the fields being separated by the value of
+.BR OFS .
+References to negative numbered fields cause a fatal error.
+Decrementing
+.B NF
+causes the values of fields past the new value to be lost, and the value of
+.B $0
+to be recomputed, with the fields being separated by the value of
+.BR OFS .
+.SS Built-in Variables
+.PP
+.IR Gawk 's
+built-in variables are:
+.PP
+.TP \w'\fBFIELDWIDTHS\fR'u+1n
+.B ARGC
+The number of command line arguments (does not include options to
+.IR gawk ,
+or the program source).
+.TP
+.B ARGIND
+The index in
+.B ARGV
+of the current file being processed.
+.TP
+.B ARGV
+Array of command line arguments. The array is indexed from
+0 to
+.B ARGC
+\- 1.
+Dynamically changing the contents of
+.B ARGV
+can control the files used for data.
+.TP
+.B CONVFMT
+The conversion format for numbers, \fB"%.6g"\fR, by default.
+.TP
+.B ENVIRON
+An array containing the values of the current environment.
+The array is indexed by the environment variables, each element being
+the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
+.BR /home/arnold ).
+Changing this array does not affect the environment seen by programs which
+.I gawk
+spawns via redirection or the
+.B system()
+function.
+(This may change in a future version of
+.IR gawk .)
+.\" but don't hold your breath...
+.TP
+.B ERRNO
+If a system error occurs either doing a redirection for
+.BR getline ,
+during a read for
+.BR getline ,
+or during a
+.BR close() ,
+then
+.B ERRNO
+will contain
+a string describing the error.
+.TP
+.B FIELDWIDTHS
+A white-space separated list of fieldwidths.  When set,
+.I gawk
+parses the input into fields of fixed width, instead of using the
+value of the
+.B FS
+variable as the field separator.
+The fixed field width facility is still experimental; the
+semantics may change as
+.I gawk
+evolves over time.
+.TP
+.B FILENAME
+The name of the current input file.
+If no files are specified on the command line, the value of
+.B FILENAME
+is ``\-''.
+However,
+.B FILENAME
+is undefined inside the
+.B BEGIN
+block.
+.TP
+.B FNR
+The input record number in the current input file.
+.TP
+.B FS
+The input field separator, a space by default.  See
+.BR Fields ,
+above.
+.TP
+.B IGNORECASE
+Controls the case-sensitivity of all regular expression 
+and string operations. If
+.B IGNORECASE
+has a non-zero value, then string comparisons and
+pattern matching in rules,
+field splitting with
+.BR FS ,
+record separating with
+.BR RS ,
+regular expression
+matching with
+.B ~
+and
+.BR !~ ,
+and the
+.BR gensub() ,
+.BR gsub() ,
+.BR index() ,
+.BR match() ,
+.BR split() ,
+and
+.B sub()
+pre-defined functions will all ignore case when doing regular expression
+operations.  Thus, if
+.B IGNORECASE
+is not equal to zero,
+.B /aB/
+matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP,
+and \fB"AB"\fP.
+As with all AWK variables, the initial value of
+.B IGNORECASE
+is zero, so all regular expression and string
+operations are normally case-sensitive.
+Under Unix, the full ISO 8859-1 Latin-1 character set is used
+when ignoring case.
+.B NOTE:
+In versions of
+.I gawk
+prior to 3.0,
+.B IGNORECASE
+only affected regular expression operations. It now affects string
+comparisons as well.
+.TP
+.B NF
+The number of fields in the current input record.
+.TP
+.B NR
+The total number of input records seen so far.
+.TP
+.B OFMT
+The output format for numbers, \fB"%.6g"\fR, by default.
+.TP
+.B OFS
+The output field separator, a space by default.
+.TP
+.B ORS
+The output record separator, by default a newline.
+.TP
+.B RS
+The input record separator, by default a newline.
+.TP
+.B RT
+The record terminator.
+.I Gawk
+sets
+.B RT
+to the input text that matched the character or regular expression
+specified by
+.BR RS .
+.TP
+.B RSTART
+The index of the first character matched by
+.BR match() ;
+0 if no match.
+.TP
+.B RLENGTH
+The length of the string matched by
+.BR match() ;
+\-1 if no match.
+.TP
+.B SUBSEP
+The character used to separate multiple subscripts in array
+elements, by default \fB"\e034"\fR.
+.SS Arrays
+.PP
+Arrays are subscripted with an expression between square brackets
+.RB ( [ " and " ] ).
+If the expression is an expression list
+.RI ( expr ", " expr " ...)"
+then the array subscript is a string consisting of the
+concatenation of the (string) value of each expression,
+separated by the value of the
+.B SUBSEP
+variable.
+This facility is used to simulate multiply dimensioned
+arrays. For example:
+.PP
+.RS
+.ft B
+i = "A";\^ j = "B";\^ k = "C"
+.br
+x[i, j, k] = "hello, world\en"
+.ft R
+.RE
+.PP
+assigns the string \fB"hello, world\en"\fR to the element of the array
+.B x
+which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK
+are associative, i.e. indexed by string values.
+.PP
+The special operator
+.B in
+may be used in an
+.B if
+or
+.B while
+statement to see if an array has an index consisting of a particular
+value.
+.PP
+.RS
+.ft B
+.nf
+if (val in array)
+	print array[val]
+.fi
+.ft
+.RE
+.PP
+If the array has multiple subscripts, use
+.BR "(i, j) in array" .
+.PP
+The
+.B in
+construct may also be used in a
+.B for
+loop to iterate over all the elements of an array.
+.PP
+An element may be deleted from an array using the
+.B delete
+statement.
+The
+.B delete
+statement may also be used to delete the entire contents of an array,
+just by specifying the array name without a subscript.
+.SS Variable Typing And Conversion
+.PP
+Variables and fields
+may be (floating point) numbers, or strings, or both. How the
+value of a variable is interpreted depends upon its context. If used in
+a numeric expression, it will be treated as a number, if used as a string
+it will be treated as a string.
+.PP
+To force a variable to be treated as a number, add 0 to it; to force it
+to be treated as a string, concatenate it with the null string.
+.PP
+When a string must be converted to a number, the conversion is accomplished
+using
+.IR atof (3).
+A number is converted to a string by using the value of
+.B CONVFMT
+as a format string for
+.IR sprintf (3),
+with the numeric value of the variable as the argument.
+However, even though all numbers in AWK are floating-point,
+integral values are
+.I always
+converted as integers.  Thus, given
+.PP
+.RS
+.ft B
+.nf
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+.fi
+.ft R
+.RE
+.PP
+the variable
+.B b
+has a string value of \fB"12"\fR and not \fB"12.00"\fR.
+.PP
+.I Gawk
+performs comparisons as follows:
+If two variables are numeric, they are compared numerically.
+If one value is numeric and the other has a string value that is a
+``numeric string,'' then comparisons are also done numerically.
+Otherwise, the numeric value is converted to a string and a string
+comparison is performed.
+Two strings are compared, of course, as strings.
+According to the \*(PX standard, even if two strings are
+numeric strings, a numeric comparison is performed.  However, this is
+clearly incorrect, and
+.I gawk
+does not do this.
+.PP
+Note that string constants, such as \fB"57"\fP, are
+.I not
+numeric strings, they are string constants.  The idea of ``numeric string''
+only applies to fields,
+.B getline
+input,
+.BR FILENAME ,
+.B ARGV
+elements,
+.B ENVIRON
+elements and the elements of an array created by
+.B split()
+that are numeric strings.
+The basic idea is that
+.IR "user input" ,
+and only user input, that looks numeric,
+should be treated that way.
+.PP
+Uninitialized variables have the numeric value 0 and the string value ""
+(the null, or empty, string).
+.SH PATTERNS AND ACTIONS
+AWK is a line oriented language. The pattern comes first, and then the
+action. Action statements are enclosed in
+.B {
+and
+.BR } .
+Either the pattern may be missing, or the action may be missing, but,
+of course, not both. If the pattern is missing, the action will be
+executed for every single record of input.
+A missing action is equivalent to
+.RS
+.PP
+.B "{ print }"
+.RE
+.PP
+which prints the entire record.
+.PP
+Comments begin with the ``#'' character, and continue until the
+end of the line.
+Blank lines may be used to separate statements.
+Normally, a statement ends with a newline, however, this is not the
+case for lines ending in
+a ``,'',
+.BR { ,
+.BR ? ,
+.BR : ,
+.BR && ,
+or
+.BR || .
+Lines ending in
+.B do
+or
+.B else
+also have their statements automatically continued on the following line.
+In other cases, a line can be continued by ending it with a ``\e'',
+in which case the newline will be ignored.
+.PP
+Multiple statements may
+be put on one line by separating them with a ``;''.
+This applies to both the statements within the action part of a
+pattern-action pair (the usual case),
+and to the pattern-action statements themselves.
+.SS Patterns
+AWK patterns may be one of the following:
+.PP
+.RS
+.nf
+.B BEGIN
+.B END
+.BI / "regular expression" /
+.I "relational expression"
+.IB pattern " && " pattern
+.IB pattern " || " pattern
+.IB pattern " ? " pattern " : " pattern
+.BI ( pattern )
+.BI ! " pattern"
+.IB pattern1 ", " pattern2
+.fi
+.RE
+.PP
+.B BEGIN
+and
+.B END
+are two special kinds of patterns which are not tested against
+the input.
+The action parts of all
+.B BEGIN
+patterns are merged as if all the statements had
+been written in a single
+.B BEGIN
+block. They are executed before any
+of the input is read. Similarly, all the
+.B END
+blocks are merged,
+and executed when all the input is exhausted (or when an
+.B exit
+statement is executed).
+.B BEGIN
+and
+.B END
+patterns cannot be combined with other patterns in pattern expressions.
+.B BEGIN
+and
+.B END
+patterns cannot have missing action parts.
+.PP
+For
+.BI / "regular expression" /
+patterns, the associated statement is executed for each input record that matches
+the regular expression.
+Regular expressions are the same as those in
+.IR egrep (1),
+and are summarized below.
+.PP
+A
+.I "relational expression"
+may use any of the operators defined below in the section on actions.
+These generally test whether certain fields match certain regular expressions.
+.PP
+The
+.BR && ,
+.BR || ,
+and
+.B !
+operators are logical AND, logical OR, and logical NOT, respectively, as in C.
+They do short-circuit evaluation, also as in C, and are used for combining
+more primitive pattern expressions. As in most languages, parentheses
+may be used to change the order of evaluation.
+.PP
+The
+.B ?\^:
+operator is like the same operator in C. If the first pattern is true
+then the pattern used for testing is the second pattern, otherwise it is
+the third. Only one of the second and third patterns is evaluated.
+.PP
+The 
+.IB pattern1 ", " pattern2
+form of an expression is called a
+.IR "range pattern" .
+It matches all input records starting with a record that matches
+.IR pattern1 ,
+and continuing until a record that matches
+.IR pattern2 ,
+inclusive. It does not combine with any other sort of pattern expression.
+.SS Regular Expressions
+Regular expressions are the extended kind found in
+.IR egrep .
+They are composed of characters as follows:
+.TP \w'\fB[^\fIabc...\fB]\fR'u+2n
+.I c
+matches the non-metacharacter
+.IR c .
+.TP
+.I \ec
+matches the literal character
+.IR c .
+.TP
+.B .
+matches any character
+.I including
+newline.
+.TP
+.B ^
+matches the beginning of a string.
+.TP
+.B $
+matches the end of a string.
+.TP
+.BI [ abc... ]
+character list, matches any of the characters
+.IR abc... .
+.TP
+.BI [^ abc... ]
+negated character list, matches any character except
+.IR abc... .
+.TP
+.IB r1 | r2
+alternation: matches either
+.I r1
+or
+.IR r2 .
+.TP
+.I r1r2
+concatenation: matches
+.IR r1 ,
+and then
+.IR r2 .
+.TP
+.IB r +
+matches one or more
+.IR r 's. 
+.TP
+.IB r *
+matches zero or more
+.IR r 's. 
+.TP
+.IB r ?
+matches zero or one
+.IR r 's. 
+.TP
+.BI ( r )
+grouping: matches
+.IR r .
+.TP
+.PD 0
+.IB r { n }
+.TP
+.PD 0
+.IB r { n ,}
+.TP
+.PD
+.IB r { n , m }
+One or two numbers inside braces denote an
+.IR "interval expression" .
+If there is one number in the braces, the preceding regexp
+.I r
+is repeated
+.I n
+times.  If there are two numbers separated by a comma,
+.I r
+is repeated
+.I n
+to
+.I m
+times.
+If there is one number followed by a comma, then
+.I r
+is repeated at least
+.I n
+times.
+.sp .5
+Interval expressions are only available if either
+.B \-\^\-posix
+or
+.B \-\^\-re\-interval
+is specified on the command line.
+.TP
+.B \ey
+matches the empty string at either the beginning or the
+end of a word.
+.TP
+.B \eB
+matches the empty string within a word.
+.TP
+.B \e<
+matches the empty string at the beginning of a word.
+.TP
+.B \e>
+matches the empty string at the end of a word.
+.TP
+.B \ew
+matches any word-constituent character (letter, digit, or underscore).
+.TP
+.B \eW
+matches any character that is not word-constituent.
+.TP
+.B \e`
+matches the empty string at the beginning of a buffer (string).
+.TP
+.B \e'
+matches the empty string at the end of a buffer.
+.PP
+The escape sequences that are valid in string constants (see below)
+are also legal in regular expressions.
+.PP
+.I "Character classes"
+are a new feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but where the 
+actual characters themselves can vary from country to country and/or
+from character set to character set.  For example, the notion of what
+is an alphabetic character differs in the USA and in France.
+.PP
+A character class is only valid in a regexp
+.I inside
+the brackets of a character list.  Character classes consist of
+.BR [: ,
+a keyword denoting the class, and
+.BR :] .
+Here are the character
+classes defined by the POSIX standard.
+.TP
+.B [:alnum:]
+Alphanumeric characters.
+.TP
+.B [:alpha:]
+Alphabetic characters.
+.TP
+.B [:blank:]
+Space or tab characters.
+.TP
+.B [:cntrl:]
+Control characters.
+.TP
+.B [:digit:]
+Numeric characters.
+.TP
+.B [:graph:]
+Characters that are both printable and visible.
+(A space is printable, but not visible, while an
+.B a
+is both.)
+.TP
+.B [:lower:]
+Lower-case alphabetic characters.
+.TP
+.B [:print:]
+Printable characters (characters that are not control characters.)
+.TP
+.B [:punct:]
+Punctuation characters (characters that are not letter, digits,
+control characters, or space characters).
+.TP
+.B [:space:]
+Space characters (such as space, tab, and formfeed, to name a few).
+.TP
+.B [:upper:]
+Upper-case alphabetic characters.
+.TP
+.B [:xdigit:]
+Characters that are hexadecimal digits.
+.PP
+For example, before the POSIX standard, to match alphanumeric
+characters, you would have had to write
+.BR /[A\-Za\-z0\-9]/ .
+If your character set had other alphabetic characters in it, this would not
+match them.  With the POSIX character classes, you can write
+.BR /[[:alnum:]]/ ,
+and this will match
+.I all
+the alphabetic and numeric characters in your character set.
+.PP
+Two additional special sequences can appear in character lists.
+These apply to non-ASCII character sets, which can have single symbols
+(called 
+.IR "collating elements" )
+that are represented with more than one
+character, as well as several characters that are equivalent for
+.IR collating ,
+or sorting, purposes.  (E.g., in French, a plain ``e''
+and a grave-accented e\` are equivalent.)
+.TP
+Collating Symbols
+A collating symbols is a multi-character collating element enclosed in
+.B [.
+and
+.BR .] .
+For example, if
+.B ch
+is a collating element, then
+.B [[.ch.]]
+is a regexp that matches this collating element, while
+.B [ch]
+is a regexp that matches either
+.B c
+or
+.BR h .
+.TP
+Equivalence Classes
+An equivalence class is a locale-specific name for a list of
+characters that are equivalent. The name is enclosed in
+.B [=
+and
+.BR =] .
+For example, the name
+.B e
+might be used to represent all of
+``e,'' ``e\`,'' and ``e\`.''
+In this case,
+.B [[=e]]
+is a regexp
+that matches any of
+ .BR e ,
+ .BR e\' ,
+or
+ .BR e\` .
+.PP
+These features are very valuable in non-English speaking locales.
+The library functions that
+.I gawk
+uses for regular expression matching
+currently only recognize POSIX character classes; they do not recognize
+collating symbols or equivalence classes.
+.PP
+The
+.BR \ey ,
+.BR \eB ,
+.BR \e< ,
+.BR \e> ,
+.BR \ew ,
+.BR \eW ,
+.BR \e` ,
+and
+.B \e'
+operators are specific to
+.IR gawk ;
+they are extensions based on facilities in the GNU regexp libraries.
+.PP
+The various command line options
+control how
+.I gawk
+interprets characters in regexps.
+.TP
+No options
+In the default case,
+.I gawk
+provide all the facilities of
+POSIX regexps and the GNU regexp operators described above.
+However, interval expressions are not supported.
+.TP
+.B \-\^\-posix
+Only POSIX regexps are supported, the GNU operators are not special.
+(E.g.,
+.B \ew
+matches a literal
+.BR w ).
+Interval expressions are allowed.
+.TP
+.B \-\^\-traditional
+Traditional Unix
+.I awk
+regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes
+.RB ( [[:alnum:]]
+and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+.TP
+.B \-\^\-re\-interval
+Allow interval expressions in regexps, even if
+.B \-\^\-traditional
+has been provided.
+.SS Actions
+Action statements are enclosed in braces,
+.B {
+and
+.BR } .
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages. The operators, control statements,
+and input/output statements
+available are patterned after those in C.
+.SS Operators
+.PP
+The operators in AWK, in order of decreasing precedence, are
+.PP
+.TP "\w'\fB*= /= %= ^=\fR'u+1n"
+.BR ( \&... )
+Grouping
+.TP
+.B $
+Field reference.
+.TP
+.B "++ \-\^\-"
+Increment and decrement, both prefix and postfix.
+.TP
+.B ^
+Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
+the assignment operator).
+.TP
+.B "+ \- !"
+Unary plus, unary minus, and logical negation.
+.TP
+.B "* / %"
+Multiplication, division, and modulus.
+.TP
+.B "+ \-"
+Addition and subtraction.
+.TP
+.I space
+String concatenation.
+.TP
+.PD 0
+.B "< >"
+.TP
+.PD 0
+.B "<= >="
+.TP
+.PD
+.B "!= =="
+The regular relational operators.
+.TP
+.B "~ !~"
+Regular expression match, negated match.
+.B NOTE:
+Do not use a constant regular expression
+.RB ( /foo/ )
+on the left-hand side of a
+.B ~
+or
+.BR !~ .
+Only use one on the right-hand side.  The expression
+.BI "/foo/ ~ " exp
+has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR.
+This is usually
+.I not
+what was intended.
+.TP
+.B in
+Array membership.
+.TP
+.B &&
+Logical AND.
+.TP
+.B ||
+Logical OR.
+.TP
+.B ?:
+The C conditional expression. This has the form
+.IB expr1 " ? " expr2 " : " expr3\c
+\&. If
+.I expr1
+is true, the value of the expression is
+.IR expr2 ,
+otherwise it is
+.IR expr3 .
+Only one of
+.I expr2
+and
+.I expr3
+is evaluated.
+.TP
+.PD 0
+.B "= += \-="
+.TP
+.PD
+.B "*= /= %= ^="
+Assignment. Both absolute assignment
+.BI ( var " = " value )
+and operator-assignment (the other forms) are supported.
+.SS Control Statements
+.PP
+The control statements are
+as follows:
+.PP
+.RS
+.nf
+\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR]
+\fBwhile (\fIcondition\fB) \fIstatement \fR
+\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR
+\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR
+\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR
+\fBbreak\fR
+\fBcontinue\fR
+\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR
+\fBdelete \fIarray\^\fR
+\fBexit\fR [ \fIexpression\fR ]
+\fB{ \fIstatements \fB}
+.fi
+.RE
+.SS "I/O Statements"
+.PP
+The input/output statements are as follows:
+.PP
+.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n"
+.BI close( file )
+Close file (or pipe, see below).
+.TP
+.B getline
+Set
+.B $0
+from next input record; set
+.BR NF ,
+.BR NR ,
+.BR FNR .
+.TP
+.BI "getline <" file
+Set
+.B $0
+from next record of
+.IR file ;
+set
+.BR NF .
+.TP
+.BI getline " var"
+Set
+.I var
+from next input record; set
+.BR NR ,
+.BR FNR .
+.TP
+.BI getline " var" " <" file
+Set
+.I var
+from next record of
+.IR file .
+.TP
+.B next
+Stop processing the current input record. The next input record
+is read and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.TP
+.B "nextfile"
+Stop processing the current input file.  The next input record read
+comes from the next input file.
+.B FILENAME
+and
+.B ARGIND
+are updated,
+.B FNR
+is reset to 1, and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.B NOTE:
+Earlier versions of gawk used
+.BR "next file" ,
+as two words. While this usage is still recognized, it generates a
+warning message and will eventually be removed.
+.TP
+.B print
+Prints the current record.
+The output record is terminated with the value of the
+.B ORS
+variable.
+.TP
+.BI print " expr-list"
+Prints expressions.
+Each expression is separated by the value of the
+.B OFS
+variable.
+The output record is terminated with the value of the
+.B ORS
+variable.
+.TP
+.BI print " expr-list" " >" file
+Prints expressions on
+.IR file .
+Each expression is separated by the value of the
+.B OFS
+variable. The output record is terminated with the value of the
+.B ORS
+variable.
+.TP
+.BI printf " fmt, expr-list"
+Format and print.
+.TP
+.BI printf " fmt, expr-list" " >" file
+Format and print on
+.IR file .
+.TP
+.BI system( cmd-line )
+Execute the command
+.IR cmd-line ,
+and return the exit status.
+(This may not be available on non-\*(PX systems.)
+.TP
+\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR
+Flush any buffers associated with the open output file or pipe
+.IR file .
+If
+.I file
+is missing, then standard output is flushed.
+If
+.I file
+is the null string,
+then all open output files and pipes
+have their buffers flushed.
+.PP
+Other input/output redirections are also allowed. For
+.B print
+and
+.BR printf ,
+.BI >> file
+appends output to the
+.IR file ,
+while
+.BI | " command"
+writes on a pipe.
+In a similar fashion,
+.IB command " | getline"
+pipes into
+.BR getline .
+The
+.BR getline
+command will return 0 on end of file, and \-1 on an error.
+.SS The \fIprintf\fP\^ Statement
+.PP
+The AWK versions of the
+.B printf
+statement and
+.B sprintf()
+function
+(see below)
+accept the following conversion specification formats:
+.TP
+.B %c
+An \s-1ASCII\s+1 character.
+If the argument used for
+.B %c
+is numeric, it is treated as a character and printed.
+Otherwise, the argument is assumed to be a string, and the only first
+character of that string is printed.
+.TP
+.PD 0
+.B %d
+.TP
+.PD
+.B %i
+A decimal number (the integer part).
+.TP
+.PD 0
+.B %e
+.TP
+.PD
+.B %E
+A floating point number of the form
+.BR [\-]d.dddddde[+\^\-]dd .
+The
+.B %E
+format uses
+.B E
+instead of
+.BR e .
+.TP
+.B %f
+A floating point number of the form
+.BR [\-]ddd.dddddd .
+.TP
+.PD 0
+.B %g
+.TP
+.PD
+.B %G
+Use
+.B %e
+or
+.B %f
+conversion, whichever is shorter, with nonsignificant zeros suppressed.
+The
+.B %G
+format uses
+.B %E
+instead of
+.BR %e .
+.TP
+.B %o
+An unsigned octal number (again, an integer).
+.TP
+.B %s
+A character string.
+.TP
+.PD 0
+.B %x
+.TP
+.PD
+.B %X
+An unsigned hexadecimal number (an integer).
+.The
+.B %X
+format uses
+.B ABCDEF
+instead of
+.BR abcdef .
+.TP
+.B %%
+A single
+.B %
+character; no argument is converted.
+.PP
+There are optional, additional parameters that may lie between the
+.B %
+and the control letter:
+.TP
+.B \-
+The expression should be left-justified within its field.
+.TP
+.I space
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+.TP
+.B +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The
+.B +
+overrides the space modifier.
+.TP
+.B #
+Use an ``alternate form'' for certain control letters.
+For
+.BR %o ,
+supply a leading zero.
+For
+.BR %x ,
+and
+.BR %X ,
+supply a leading
+.BR 0x 
+or
+.BR 0X 
+for
+a nonzero result.
+For
+.BR %e ,
+.BR %E ,
+and
+.BR %f ,
+the result will always contain a
+decimal point.
+For
+.BR %g ,
+and
+.BR %G ,
+trailing zeros are not removed from the result.
+.TP
+.B 0
+A leading
+.B 0
+(zero) acts as a flag, that indicates output should be
+padded with zeroes instead of spaces.
+This applies even to non-numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to be printed.
+.TP
+.I width
+The field should be padded to this width. The field is normally padded
+with spaces.  If the
+.B 0
+flag has been used, it is padded with zeroes.
+.TP
+.BI \&. prec
+A number that specifies the precision to use when printing.
+For the
+.BR %e ,
+.BR %E ,
+and
+.BR %f 
+formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the
+.BR %g ,
+and
+.B %G
+formats, it specifies the maximum number
+of significant digits.  For the
+.BR %d ,
+.BR %o ,
+.BR %i ,
+.BR %u ,
+.BR %x ,
+and
+.B %X
+formats, it specifies the minimum number of
+digits to print.  For a string, it specifies the maximum number of
+characters from the string that should be printed.
+.PP
+The dynamic
+.I width
+and
+.I prec
+capabilities of the \*(AN C
+.B printf()
+routines are supported.
+A
+.B *
+in place of either the
+.B width
+or
+.B prec
+specifications will cause their values to be taken from
+the argument list to
+.B printf
+or
+.BR sprintf() .
+.SS Special File Names
+.PP
+When doing I/O redirection from either
+.B print
+or
+.B printf
+into a file,
+or via
+.B getline
+from a file,
+.I gawk
+recognizes certain special filenames internally.  These filenames
+allow access to open file descriptors inherited from
+.IR gawk 's
+parent process (usually the shell).
+Other special filenames provide access to information about the running
+.B gawk
+process.
+The filenames are:
+.TP \w'\fB/dev/stdout\fR'u+1n
+.B /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+.TP
+.B /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with spaces.
+.B $1
+is the value of the
+.IR getuid (2)
+system call,
+.B $2
+is the value of the
+.IR geteuid (2)
+system call,
+.B $3
+is the value of the
+.IR getgid (2)
+system call, and
+.B $4
+is the value of the
+.IR getegid (2)
+system call.
+If there are any additional fields, they are the group IDs returned by
+.IR getgroups (2).
+Multiple groups may not be supported on all systems.
+.TP
+.B /dev/stdin
+The standard input.
+.TP
+.B /dev/stdout
+The standard output.
+.TP
+.B /dev/stderr
+The standard error output.
+.TP
+.BI /dev/fd/\^ n
+The file associated with the open file descriptor
+.IR n .
+.PP
+These are particularly useful for error messages. For example:
+.PP
+.RS
+.ft B
+print "You blew it!" > "/dev/stderr"
+.ft R
+.RE
+.PP
+whereas you would otherwise have to use
+.PP
+.RS
+.ft B
+print "You blew it!" | "cat 1>&2"
+.ft R
+.RE
+.PP
+These file names may also be used on the command line to name data files.
+.SS Numeric Functions
+.PP
+AWK has the following pre-defined arithmetic functions:
+.PP
+.TP \w'\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR'u+1n
+.BI atan2( y , " x" )
+returns the arctangent of
+.I y/x
+in radians.
+.TP
+.BI cos( expr )
+returns the cosine of
+.IR expr ,
+which is in radians.
+.TP
+.BI exp( expr )
+the exponential function.
+.TP
+.BI int( expr )
+truncates to integer.
+.TP
+.BI log( expr )
+the natural logarithm function.
+.TP
+.B rand()
+returns a random number between 0 and 1.
+.TP
+.BI sin( expr )
+returns the sine of
+.IR expr ,
+which is in radians.
+.TP
+.BI sqrt( expr )
+the square root function.
+.TP
+\&\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR
+uses
+.I expr
+as a new seed for the random number generator. If no
+.I expr
+is provided, the time of day will be used.
+The return value is the previous seed for the random
+number generator.
+.SS String Functions
+.PP
+.I Gawk
+has the following pre-defined string functions:
+.PP
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+\fBgensub(\fIr\fB, \fIs\fB, \fIh \fR[\fB, \fIt\fR]\fB)\fR
+search the target string
+.I t
+for matches of the regular expression
+.IR r .
+If
+.I h
+is a string beginning with
+.B g
+or
+.BR G ,
+then replace all matches of
+.I r
+with
+.IR s .
+Otherwise,
+.I h
+is a number indicating which match of
+.I r
+to replace.
+If no
+.I t
+is supplied,
+.B $0
+is used instead.
+Within the replacement text
+.IR s ,
+the sequence
+.BI \e n\fR,
+where
+.I n
+is a digit from 1 to 9, may be used to indicate just the text that
+matched the
+.IR n 'th
+parenthesized subexpression. The sequence
+.B \e0
+represents the entire matched text, as does the character
+.BR & .
+Unlike
+.B sub()
+and
+.BR gsub() ,
+the modified string is returned as the result of the function,
+and the original target string is
+.I not
+changed.
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+\fBgsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
+for each substring matching the regular expression
+.I r
+in the string
+.IR t ,
+substitute the string
+.IR s ,
+and return the number of substitutions.
+If
+.I t
+is not supplied, use
+.BR $0 .
+An
+.B &
+in the replacement text is replaced with the text that was actually matched.
+Use
+.B \e&
+to get a literal
+.BR & .
+See
+.I "AWK Language Programming"
+for a fuller discussion of the rules for
+.BR &'s
+and backslashes in the replacement text of
+.BR sub() ,
+.BR gsub() ,
+and
+.BR gensub() .
+.TP
+.BI index( s , " t" )
+returns the index of the string
+.I t
+in the string
+.IR s ,
+or 0 if
+.I t
+is not present.
+.TP
+\fBlength(\fR[\fIs\fR]\fB)
+returns the length of the string
+.IR s ,
+or the length of
+.B $0
+if
+.I s
+is not supplied.
+.TP
+.BI match( s , " r" )
+returns the position in
+.I s
+where the regular expression
+.I r
+occurs, or 0 if
+.I r
+is not present, and sets the values of
+.B RSTART
+and
+.BR RLENGTH .
+.TP
+\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR]\fB)\fR
+splits the string
+.I s
+into the array
+.I a
+on the regular expression
+.IR r ,
+and returns the number of fields. If
+.I r
+is omitted,
+.B FS
+is used instead.
+The array
+.I a
+is cleared first.
+Splitting behaves identically to field splitting, described above.
+.TP
+.BI sprintf( fmt , " expr-list" )
+prints
+.I expr-list
+according to
+.IR fmt ,
+and returns the resulting string.
+.TP
+\fBsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
+just like
+.BR gsub() ,
+but only the first matching substring is replaced.
+.TP
+\fBsubstr(\fIs\fB, \fIi \fR[\fB, \fIn\fR]\fB)\fR
+returns the at most
+.IR n -character
+substring of
+.I s
+starting at
+.IR i .
+If
+.I n
+is omitted, the rest of
+.I s
+is used.
+.TP
+.BI tolower( str )
+returns a copy of the string
+.IR str ,
+with all the upper-case characters in
+.I str
+translated to their corresponding lower-case counterparts.
+Non-alphabetic characters are left unchanged.
+.TP
+.BI toupper( str )
+returns a copy of the string
+.IR str ,
+with all the lower-case characters in
+.I str
+translated to their corresponding upper-case counterparts.
+Non-alphabetic characters are left unchanged.
+.SS Time Functions
+.PP
+Since one of the primary uses of AWK programs is processing log files
+that contain time stamp information,
+.I gawk
+provides the following two functions for obtaining time stamps and
+formatting them.
+.PP
+.TP "\w'\fBsystime()\fR'u+1n"
+.B systime()
+returns the current time of day as the number of seconds since the Epoch
+(Midnight UTC, January 1, 1970 on \*(PX systems).
+.TP
+\fBstrftime(\fR[\fIformat \fR[\fB, \fItimestamp\fR]]\fB)\fR
+formats
+.I timestamp
+according to the specification in
+.IR format.
+The
+.I timestamp
+should be of the same form as returned by
+.BR systime() .
+If
+.I timestamp
+is missing, the current time of day is used.
+If
+.I format
+is missing, a default format equivalent to the output of
+.IR date (1)
+will be used.
+See the specification for the
+.B strftime()
+function in \*(AN C for the format conversions that are
+guaranteed to be available.
+A public-domain version of
+.IR strftime (3)
+and a man page for it come with
+.IR gawk ;
+if that version was used to build
+.IR gawk ,
+then all of the conversions described in that man page are available to
+.IR gawk.
+.SS String Constants
+.PP
+String constants in AWK are sequences of characters enclosed
+between double quotes (\fB"\fR). Within strings, certain
+.I "escape sequences"
+are recognized, as in C. These are:
+.PP
+.TP \w'\fB\e\^\fIddd\fR'u+1n
+.B \e\e
+A literal backslash.
+.TP
+.B \ea
+The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character.
+.TP
+.B \eb
+backspace.
+.TP
+.B \ef
+form-feed.
+.TP
+.B \en
+newline.
+.TP
+.B \er
+carriage return.
+.TP
+.B \et
+horizontal tab.
+.TP
+.B \ev
+vertical tab.
+.TP
+.BI \ex "\^hex digits"
+The character represented by the string of hexadecimal digits following
+the
+.BR \ex .
+As in \*(AN C, all following hexadecimal digits are considered part of
+the escape sequence.
+(This feature should tell us something about language design by committee.)
+E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
+.BI \e ddd
+The character represented by the 1-, 2-, or 3-digit sequence of octal
+digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
+.BI \e c
+The literal character
+.IR c\^ .
+.PP
+The escape sequences may also be used inside constant regular expressions
+(e.g.,
+.B "/[\ \et\ef\en\er\ev]/"
+matches whitespace characters).
+.PP
+In compatibility mode, the characters represented by octal and
+hexadecimal escape sequences are treated literally when used in
+regexp constants. Thus,
+.B /a\e52b/
+is equivalent to
+.BR /a\e*b/ .
+.SH FUNCTIONS
+Functions in AWK are defined as follows:
+.PP
+.RS
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR
+.RE
+.PP
+Functions are executed when they are called from within expressions
+in either patterns or actions.  Actual parameters supplied in the function
+call are used to instantiate the formal parameters declared in the function.
+Arrays are passed by reference, other variables are passed by value.
+.PP
+Since functions were not originally part of the AWK language, the provision
+for local variables is rather clumsy: They are declared as extra parameters
+in the parameter list. The convention is to separate local variables from
+real parameters by extra spaces in the parameter list. For example:
+.PP
+.RS
+.ft B
+.nf
+function  f(p, q,     a, b)	# a & b are local
+{
+	\&.....
+}
+
+/abc/	{ ... ; f(1, 2) ; ... }
+.fi
+.ft R
+.RE
+.PP
+The left parenthesis in a function call is required
+to immediately follow the function name,
+without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+This restriction does not apply to the built-in functions listed above.
+.PP
+Functions may call each other and may be recursive.
+Function parameters used as local variables are initialized
+to the null string and the number zero upon function invocation.
+.PP
+If
+.B \-\^\-lint
+has been provided,
+.I gawk
+will warn about calls to undefined functions at parse time,
+instead of at run time.
+Calling an undefined function at run time is a fatal error.
+.PP
+The word
+.B func
+may be used in place of
+.BR function .
+.SH EXAMPLES
+.nf
+Print and sort the login names of all users:
+
+.ft B
+	BEGIN	{ FS = ":" }
+		{ print $1 | "sort" }
+
+.ft R
+Count lines in a file:
+
+.ft B
+		{ nlines++ }
+	END	{ print nlines }
+
+.ft R
+Precede each line by its number in the file:
+
+.ft B
+	{ print FNR, $0 }
+
+.ft R
+Concatenate and line number (a variation on a theme):
+
+.ft B
+	{ print NR, $0 }
+.ft R
+.fi
+.SH SEE ALSO
+.IR egrep (1),
+.IR getpid (2),
+.IR getppid (2),
+.IR getpgrp (2),
+.IR getuid (2),
+.IR geteuid (2),
+.IR getgid (2),
+.IR getegid (2),
+.IR getgroups (2)
+.PP
+.IR "The AWK Programming Language" ,
+Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
+Addison-Wesley, 1988. ISBN 0-201-07981-X.
+.PP
+.IR "AWK Language Programming" ,
+Edition 1.0, published by the Free Software Foundation, 1995.
+.SH POSIX COMPATIBILITY
+A primary goal for
+.I gawk
+is compatibility with the \*(PX standard, as well as with the
+latest version of \*(UX
+.IR awk .
+To this end,
+.I gawk
+incorporates the following user visible
+features which are not described in the AWK book,
+but are part of the Bell Labs version of
+.IR awk ,
+and are in the \*(PX standard.
+.PP
+The
+.B \-v
+option for assigning variables before program execution starts is new.
+The book indicates that command line variable assignment happens when
+.I awk
+would otherwise open the argument as a file, which is after the
+.B BEGIN
+block is executed.  However, in earlier implementations, when such an
+assignment appeared before any file names, the assignment would happen
+.I before
+the
+.B BEGIN
+block was run.  Applications came to depend on this ``feature.''
+When
+.I awk
+was changed to match its documentation, this option was added to
+accommodate applications that depended upon the old behavior.
+(This feature was agreed upon by both the AT&T and GNU developers.)
+.PP
+The
+.B \-W
+option for implementation specific features is from the \*(PX standard.
+.PP
+When processing arguments,
+.I gawk
+uses the special option ``\fB\-\^\-\fP'' to signal the end of
+arguments.
+In compatibility mode, it will warn about, but otherwise ignore,
+undefined options.
+In normal operation, such arguments are passed on to the AWK program for
+it to process.
+.PP
+The AWK book does not define the return value of
+.BR srand() .
+The \*(PX standard
+has it return the seed it was using, to allow keeping track
+of random number sequences. Therefore
+.B srand()
+in
+.I gawk
+also returns its current seed.
+.PP
+Other new features are:
+The use of multiple
+.B \-f
+options (from MKS
+.IR awk );
+the
+.B ENVIRON
+array; the
+.BR \ea ,
+and
+.BR \ev
+escape sequences (done originally in
+.I gawk
+and fed back into AT&T's); the
+.B tolower()
+and
+.B toupper()
+built-in functions (from AT&T); and the \*(AN C conversion specifications in
+.B printf
+(done first in AT&T's version).
+.SH GNU EXTENSIONS
+.I Gawk
+has a number of extensions to \*(PX
+.IR awk .
+They are described in this section.  All the extensions described here
+can be disabled by
+invoking
+.I gawk
+with the
+.B \-\^\-traditional
+option.
+.PP
+The following features of
+.I gawk
+are not available in
+\*(PX
+.IR awk .
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B \ex
+escape sequence.
+(Disabled with
+.BR \-\^\-posix .)
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B fflush()
+function.
+(Disabled with
+.BR \-\^\-posix .)
+.TP
+\(bu
+The
+.BR systime(),
+.BR strftime(),
+and
+.B gensub()
+functions.
+.TP
+\(bu
+The special file names available for I/O redirection are not recognized.
+.TP
+\(bu
+The
+.BR ARGIND ,
+.BR ERRNO ,
+and
+.B RT
+variables are not special.
+.TP
+\(bu
+The
+.B IGNORECASE
+variable and its side-effects are not available.
+.TP
+\(bu
+The
+.B FIELDWIDTHS
+variable and fixed-width field splitting.
+.TP
+\(bu
+The use of
+.B RS
+as a regular expression.
+.TP
+\(bu
+The ability to split out individual characters using the null string
+as the value of
+.BR FS ,
+and as the third argument to
+.BR split() .
+.TP
+\(bu
+No path search is performed for files named via the
+.B \-f
+option.  Therefore the
+.B AWKPATH
+environment variable is not special.
+.TP
+\(bu
+The use of
+.B "nextfile"
+to abandon processing of the current input file.
+.TP
+\(bu
+The use of
+.BI delete " array"
+to delete the entire contents of an array.
+.RE
+.PP
+The AWK book does not define the return value of the
+.B close()
+function.
+.IR Gawk\^ 's
+.B close()
+returns the value from
+.IR fclose (3),
+or
+.IR pclose (3),
+when closing a file or pipe, respectively.
+.PP
+When
+.I gawk
+is invoked with the
+.B \-\^\-traditional
+option,
+if the
+.I fs
+argument to the
+.B \-F
+option is ``t'', then
+.B FS
+will be set to the tab character.
+Note that typing
+.B "gawk \-F\et \&..."
+simply causes the shell to quote the ``t,'', and does not pass
+``\et'' to the
+.B \-F
+option.
+Since this is a rather ugly special case, it is not the default behavior.
+This behavior also does not occur if
+.B \-\^\-posix
+has been specified.
+To really get a tab character as the field separator, it is best to use
+quotes:
+.BR "gawk \-F'\et' \&..." .
+.ig
+.PP
+If
+.I gawk
+was compiled for debugging, it will
+accept the following additional options:
+.TP
+.PD 0
+.B \-Wparsedebug
+.TP
+.PD
+.B \-\^\-parsedebug
+Turn on
+.IR yacc (1)
+or
+.IR bison (1)
+debugging output during program parsing.
+This option should only be of interest to the
+.I gawk
+maintainers, and may not even be compiled into
+.IR gawk .
+..
+.SH HISTORICAL FEATURES
+There are two features of historical AWK implementations that
+.I gawk
+supports.
+First, it is possible to call the
+.B length()
+built-in function not only with no argument, but even without parentheses!
+Thus,
+.RS
+.PP
+.ft B
+a = length	# Holy Algol 60, Batman!
+.ft R
+.RE
+.PP
+is the same as either of
+.RS
+.PP
+.ft B
+a = length()
+.br
+a = length($0)
+.ft R
+.RE
+.PP
+This feature is marked as ``deprecated'' in the \*(PX standard, and
+.I gawk
+will issue a warning about its use if
+.B \-\^\-lint
+is specified on the command line.
+.PP
+The other feature is the use of either the
+.B continue
+or the
+.B break
+statements outside the body of a
+.BR while ,
+.BR for ,
+or
+.B do
+loop.  Traditional AWK implementations have treated such usage as
+equivalent to the
+.B next
+statement.
+.I Gawk
+will support this usage if
+.B \-\^\-traditional
+has been specified.
+.SH ENVIRONMENT VARIABLES
+If
+.B POSIXLY_CORRECT
+exists in the environment, then
+.I gawk
+behaves exactly as if
+.B \-\^\-posix
+had been specified on the command line.
+If
+.B \-\^\-lint
+has been specified,
+.I gawk
+will issue a warning message to this effect.
+.PP
+The
+.B AWKPATH
+environment variable can be used to provide a list of directories that
+.I gawk
+will search when looking for files named via the
+.B \-f
+and
+.B \-\^\-file
+options.
+.SH BUGS
+The
+.B \-F
+option is not necessary given the command line variable assignment feature;
+it remains only for backwards compatibility.
+.PP
+If your system actually has support for
+.B /dev/fd
+and the associated
+.BR /dev/stdin ,
+.BR /dev/stdout ,
+and
+.B /dev/stderr
+files, you may get different output from
+.I gawk
+than you would get on a system without those files.  When
+.I gawk
+interprets these files internally, it synchronizes output to the standard
+output with output to
+.BR /dev/stdout ,
+while on a system with those files, the output is actually to different
+open files.
+Caveat Emptor.
+.PP
+Syntactically invalid single character programs tend to overflow
+the parse stack, generating a rather unhelpful message.  Such programs
+are surprisingly difficult to diagnose in the completely general case,
+and the effort to do so really is not worth it.
+.SH VERSION INFORMATION
+This man page documents
+.IR gawk ,
+version 3.0.2.
+.SH AUTHORS
+The original version of \*(UX
+.I awk
+was designed and implemented by Alfred Aho,
+Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan
+continues to maintain and enhance it.
+.PP
+Paul Rubin and Jay Fenlason,
+of the Free Software Foundation, wrote
+.IR gawk ,
+to be compatible with the original version of
+.I awk
+distributed in Seventh Edition \*(UX.
+John Woods contributed a number of bug fixes.
+David Trueman, with contributions
+from Arnold Robbins, made
+.I gawk
+compatible with the new version of \*(UX
+.IR awk .
+Arnold Robbins is the current maintainer.
+.PP
+The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
+Scott Deifik is the current DOS maintainer.  Pat Rankin did the
+port to VMS, and Michal Jaegermann did the port to the Atari ST.
+The port to OS/2 was done by Kai Uwe Rommel, with contributions and
+help from Darrel Hankerson.  Fred Fish supplied support for the Amiga.
+.SH BUG REPORTS
+If you find a bug in
+.IR gawk ,
+please send electronic mail to
+.BR bug-gnu-utils@prep.ai.mit.edu ,
+.I with
+a carbon copy to
+.BR arnold@gnu.ai.mit.edu .
+Please include your operating system and its revision, the version of
+.IR gawk ,
+what C compiler you used to compile it, and a test program
+and data that are as small as possible for reproducing the problem.
+.PP
+Before sending a bug report, please do two things. First, verify that
+you have the latest version of
+.IR gawk .
+Many bugs (usually subtle ones) are fixed at each release, and if
+yours is out of date, the problem may already have been solved.
+Second, please read this man page and the reference manual carefully to
+be sure that what you think is a bug really is, instead of just a quirk
+in the language.
+.PP
+Whatever you do, do
+.B NOT
+post a bug report in
+.BR comp.lang.awk .
+While the
+.I gawk
+developers occasionally read this newsgroup, posting bug reports there
+is an unreliable way to report bugs. Instead, please use the electronic mail
+addresses given above.
+.SH ACKNOWLEDGEMENTS
+Brian Kernighan of Bell Labs
+provided valuable assistance during testing and debugging.
+We thank him.
+.SH COPYING PERMISSIONS
+Copyright \(co) 1996 Free Software Foundation, Inc.
+.PP
+Permission is granted to make and distribute verbatim copies of
+this manual page provided the copyright notice and this permission
+notice are preserved on all copies.
+.ig
+Permission is granted to process this file through troff and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual page).
+..
+.PP
+Permission is granted to copy and distribute modified versions of this
+manual page under the conditions for verbatim copying, provided that
+the entire resulting derived work is distributed under the terms of a
+permission notice identical to this one.
+.PP
+Permission is granted to copy and distribute translations of this
+manual page into another language, under the above conditions for
+modified versions, except that this permission notice may be stated in
+a translation approved by the Foundation.
diff --git a/contrib/awk/doc/gawk.texi b/contrib/awk/doc/gawk.texi
new file mode 100644
index 0000000..8c2aad2
--- /dev/null
+++ b/contrib/awk/doc/gawk.texi
@@ -0,0 +1,20820 @@
+\input texinfo   @c -*-texinfo-*-
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename gawk.info
+@settitle The GNU Awk User's Guide
+@c %**end of header (This is for running Texinfo on a region.)
+
+@c inside ifinfo for older versions of texinfo.tex
+@ifinfo
+@c I hope this is the right category
+@dircategory Programming Languages
+@direntry
+* Gawk: (gawk.info).           A Text Scanning and Processing Language.
+@end direntry
+@end ifinfo
+
+@c @set xref-automatic-section-title
+@c @set DRAFT
+
+@c The following information should be updated here only!
+@c This sets the edition of the document, the version of gawk it
+@c applies to, and when the document was updated.
+@set TITLE Effective AWK Programming
+@set SUBTITLE A User's Guide for GNU Awk
+@set PATCHLEVEL 3
+@set EDITION 1.0.@value{PATCHLEVEL}
+@set VERSION 3.0
+@set UPDATE-MONTH February 1997
+@iftex
+@set DOCUMENT book
+@end iftex
+@ifinfo
+@set DOCUMENT Info file
+@end ifinfo
+
+@ignore
+Some comments on the layout for TeX.
+1. Use at least texinfo.tex 2.159. It contains fixes that
+   are needed to get the footings for draft mode to not appear.
+2. I have done A LOT of work to make this look good. There are  `@page' commands
+   and use of `@group ... @end group' in a number of places. If you muck
+   with anything, it's your responsibility not to break the layout.
+@end ignore
+
+@c merge the function and variable indexes into the concept index
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long.  Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@ifclear DRAFT
+@iftex
+@finalout
+@end iftex
+@end ifclear
+
+@smallbook
+@iftex
+@c @cropmarks
+@end iftex
+
+@ifinfo
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}},
+for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation of AWK.
+
+Copyright (C) 1989, 1991, 92, 93, 96, 97 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@setchapternewpage odd
+
+@titlepage
+@title @value{TITLE}
+@subtitle @value{SUBTITLE}
+@subtitle Edition @value{EDITION}
+@subtitle @value{UPDATE-MONTH}
+@author Arnold D. Robbins
+@ignore
+@sp 1
+@author Based on @cite{The GAWK Manual},
+@author by Robbins, Close, Rubin, and Stallman
+@end ignore
+
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off.  Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+@ifset LEGALJUNK
+The programs and applications presented in this book have been
+included for their instructional value.  They have been tested with care,
+but are not guaranteed for any particular purpose.  The publisher does not
+offer any warranties or representations, nor does it accept any
+liabilities with respect to the programs or applications.
+So there.
+@sp 2
+UNIX is a registered trademark of X/Open, Ltd. @*
+Microsoft, MS, and MS-DOS are registered trademarks, and Windows is a
+trademark of Microsoft Corporation in the United States and other
+countries. @*
+Atari, 520ST, 1040ST, TT, STE, Mega, and Falcon are registered trademarks
+or trademarks of Atari Corporation. @*
+DEC, Digital, OpenVMS, ULTRIX, and VMS, are trademarks of Digital Equipment
+Corporation. @*
+@end ifset
+``To boldly go where no man has gone before'' is a
+Registered Trademark of Paramount Pictures Corporation. @*
+@c sorry, i couldn't resist
+@sp 3
+Copyright @copyright{} 1989, 1991, 92, 93, 96, 97 Free Software Foundation, Inc.
+@sp 2
+        
+This is Edition @value{EDITION} of @cite{@value{TITLE}}, @*
+for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU implementation of AWK.
+
+@sp 2
+@center Published jointly by:
+
+@multitable {Specialized Systems Consultants, Inc. (SSC)} {Boston, MA  02111-1307 USA}
+@item Specialized Systems Consultants, Inc. (SSC) @tab Free Software Foundation
+@item PO Box 55549 @tab 59 Temple Place --- Suite 330
+@item Seattle, WA 98155 USA @tab Boston, MA  02111-1307 USA
+@item Phone: +1-206-782-7733 @tab Phone: +1-617-542-5942
+@item Fax: +1-206-782-7191 @tab Fax: +1-617-542-2652
+@item E-mail: @code{sales@@ssc.com} @tab E-mail: @code{gnu@@prep.ai.mit.edu}
+@item URL: @code{http://www.ssc.com/} @tab URL: @code{http://www.fsf.org/}
+@end multitable
+
+@sp 1
+@c this ISBN can change! Check with SSC
+@c This one is correct for gawk 3.0 and edition 1.0 from the FSF
+@c ISBN 1-882114-26-4 @*
+@c This one is correct for gawk 3.0.3 and edition 1.0.3 from SSC
+ISBN 1-57831-000-8 @*
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@sp 2
+@c Cover art by Etienne Suvasa.
+Cover art by Amy Wells Wood.
+@end titlepage
+
+@c Thanks to Bob Chassell for directions on doing dedications.
+@iftex
+@headings off
+@page
+@w{ }
+@sp 9
+@center @i{To Miriam, for making me complete.}
+@sp 1
+@center @i{To Chana, for the joy you bring us.}
+@sp 1
+@center @i{To Rivka, for the exponential increase.}
+@sp 1
+@center @i{To Nachum, for the added dimension.}
+@page
+@w{ }
+@page
+@headings on
+@end iftex
+
+@iftex
+@headings off
+@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @|
+@oddheading  @| @| @strong{@thischapter}@ @ @ @thispage
+@ifset DRAFT
+@evenfooting @today{} @| @emph{DRAFT!} @| Please Do Not Redistribute
+@oddfooting Please Do Not Redistribute @| @emph{DRAFT!} @| @today{}
+@end ifset
+@end iftex
+
+@ifinfo
+@node Top, Preface, (dir), (dir)
+@top General Introduction
+@c Preface or Licensing nodes should come right after the Top
+@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
+
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}}, @*
+for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation @*
+of AWK.
+
+@end ifinfo
+
+@menu
+* Preface::                     What this @value{DOCUMENT} is about; brief
+                                history and acknowledgements.
+* What Is Awk::                 What is the @code{awk} language; using this
+                                @value{DOCUMENT}.
+* Getting Started::             A basic introduction to using @code{awk}. How
+                                to run an @code{awk} program. Command line
+                                syntax.
+* One-liners::                  Short, sample @code{awk} programs.
+* Regexp::                      All about matching things using regular
+                                expressions.
+* Reading Files::               How to read files and manipulate fields.
+* Printing::                    How to print using @code{awk}.  Describes the
+                                @code{print} and @code{printf} statements.  
+                                Also describes redirection of output.
+* Expressions::                 Expressions are the basic building blocks of
+                                statements.
+* Patterns and Actions::        Overviews of patterns and actions.
+* Statements::                  The various control statements are described
+                                in detail.
+* Built-in Variables::          Built-in Variables
+* Arrays::                      The description and use of arrays. Also
+                                includes array-oriented control statements.
+* Built-in::                    The built-in functions are summarized here.
+* User-defined::                User-defined functions are described in
+                                detail.
+* Invoking Gawk::               How to run @code{gawk}.
+* Library Functions::           A Library of @code{awk} Functions.
+* Sample Programs::             Many @code{awk} programs with complete
+                                explanations.
+* Language History::            The evolution of the @code{awk} language.
+* Gawk Summary::                @code{gawk} Options and Language Summary.
+* Installation::                Installing @code{gawk} under various operating
+                                systems.
+* Notes::                       Something about the implementation of
+                                @code{gawk}.
+* Glossary::                    An explanation of some unfamiliar terms.
+* Copying::                     Your right to copy and distribute @code{gawk}.
+* Index::                       Concept and Variable Index.
+
+* History::                     The history of @code{gawk} and @code{awk}.
+* Manual History::              Brief history of the GNU project and this
+                                @value{DOCUMENT}.
+* Acknowledgements::            Acknowledgements.
+* This Manual::                 Using this @value{DOCUMENT}. Includes sample
+                                input files that you can use.
+* Conventions::                 Typographical Conventions.
+* Sample Data Files::           Sample data files for use in the @code{awk}
+                                programs illustrated in this @value{DOCUMENT}.
+* Names::                       What name to use to find @code{awk}.
+* Running gawk::                How to run @code{gawk} programs; includes
+                                command line syntax.
+* One-shot::                    Running a short throw-away @code{awk} program.
+* Read Terminal::               Using no input files (input from terminal
+                                instead).
+* Long::                        Putting permanent @code{awk} programs in
+                                files.
+* Executable Scripts::          Making self-contained @code{awk} programs.
+* Comments::                    Adding documentation to @code{gawk} programs.
+* Very Simple::                 A very simple example.
+* Two Rules::                   A less simple one-line example with two rules.
+* More Complex::                A more complex example.
+* Statements/Lines::            Subdividing or combining statements into
+                                lines.
+* Other Features::              Other Features of @code{awk}.
+* When::                        When to use @code{gawk} and when to use other
+                                things.
+* Regexp Usage::                How to Use Regular Expressions.
+* Escape Sequences::            How to write non-printing characters.
+* Regexp Operators::            Regular Expression Operators.
+* GNU Regexp Operators::        Operators specific to GNU software.
+* Case-sensitivity::            How to do case-insensitive matching.
+* Leftmost Longest::            How much text matches.
+* Computed Regexps::            Using Dynamic Regexps.
+* Records::                     Controlling how data is split into records.
+* Fields::                      An introduction to fields.
+* Non-Constant Fields::         Non-constant Field Numbers.
+* Changing Fields::             Changing the Contents of a Field.
+* Field Separators::            The field separator and how to change it.
+* Basic Field Splitting::       How fields are split with single characters or
+                                simple strings.
+* Regexp Field Splitting::      Using regexps as the field separator.
+* Single Character Fields::     Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Field Splitting Summary::     Some final points and a summary table.
+* Constant Size::               Reading constant width data.
+* Multiple Line::               Reading multi-line records.
+* Getline::                     Reading files under explicit program control
+                                using the @code{getline} function.
+* Getline Intro::               Introduction to the @code{getline} function.
+* Plain Getline::               Using @code{getline} with no arguments.
+* Getline/Variable::            Using @code{getline} into a variable.
+* Getline/File::                Using @code{getline} from a file.
+* Getline/Variable/File::       Using @code{getline} into a variable from a
+                                file.
+* Getline/Pipe::                Using @code{getline} from a pipe.
+* Getline/Variable/Pipe::       Using @code{getline} into a variable from a
+                                pipe.
+* Getline Summary::             Summary Of @code{getline} Variants.
+* Print::                       The @code{print} statement.
+* Print Examples::              Simple examples of @code{print} statements.
+* Output Separators::           The output separators and how to change them.
+* OFMT::                        Controlling Numeric Output With @code{print}.
+* Printf::                      The @code{printf} statement.
+* Basic Printf::                Syntax of the @code{printf} statement.
+* Control Letters::             Format-control letters.
+* Format Modifiers::            Format-specification modifiers.
+* Printf Examples::             Several examples.
+* Redirection::                 How to redirect output to multiple files and
+                                pipes.
+* Special Files::               File name interpretation in @code{gawk}.
+                                @code{gawk} allows access to inherited file
+                                descriptors.
+* Close Files And Pipes::       Closing Input and Output Files and Pipes.
+* Constants::                   String, numeric, and regexp constants.
+* Scalar Constants::            Numeric and string constants.
+* Regexp Constants::            Regular Expression constants.
+* Using Constant Regexps::      When and how to use a regexp constant.
+* Variables::                   Variables give names to values for later use.
+* Using Variables::             Using variables in your programs.
+* Assignment Options::          Setting variables on the command line and a
+                                summary of command line syntax. This is an
+                                advanced method of input.
+* Conversion::                  The conversion of strings to numbers and vice
+                                versa.
+* Arithmetic Ops::              Arithmetic operations (@samp{+}, @samp{-},
+                                etc.)
+* Concatenation::               Concatenating strings.
+* Assignment Ops::              Changing the value of a variable or a field.
+* Increment Ops::               Incrementing the numeric value of a variable.
+* Truth Values::                What is ``true'' and what is ``false''.
+* Typing and Comparison::       How variables acquire types, and how this
+                                affects comparison of numbers and strings with
+                                @samp{<}, etc.
+* Boolean Ops::                 Combining comparison expressions using boolean
+                                operators @samp{||} (``or''), @samp{&&}
+                                (``and'') and @samp{!} (``not'').
+* Conditional Exp::             Conditional expressions select between two
+                                subexpressions under control of a third
+                                subexpression.
+* Function Calls::              A function call is an expression.
+* Precedence::                  How various operators nest.
+* Pattern Overview::            What goes into a pattern.
+* Kinds of Patterns::           A list of all kinds of patterns.
+* Regexp Patterns::             Using regexps as patterns.
+* Expression Patterns::         Any expression can be used as a pattern.
+* Ranges::                      Pairs of patterns specify record ranges.
+* BEGIN/END::                   Specifying initialization and cleanup rules.
+* Using BEGIN/END::             How and why to use BEGIN/END rules.
+* I/O And BEGIN/END::           I/O issues in BEGIN/END rules.
+* Empty::                       The empty pattern, which matches every record.
+* Action Overview::             What goes into an action.
+* If Statement::                Conditionally execute some @code{awk}
+                                statements.
+* While Statement::             Loop until some condition is satisfied.
+* Do Statement::                Do specified action while looping until some
+                                condition is satisfied.
+* For Statement::               Another looping statement, that provides
+                                initialization and increment clauses.
+* Break Statement::             Immediately exit the innermost enclosing loop.
+* Continue Statement::          Skip to the end of the innermost enclosing
+                                loop.
+* Next Statement::              Stop processing the current input record.
+* Nextfile Statement::          Stop processing the current file.
+* Exit Statement::              Stop execution of @code{awk}.
+* User-modified::               Built-in variables that you change to control
+                                @code{awk}.
+* Auto-set::                    Built-in variables where @code{awk} gives you
+                                information.
+* ARGC and ARGV::               Ways to use @code{ARGC} and @code{ARGV}.
+* Array Intro::                 Introduction to Arrays
+* Reference to Elements::       How to examine one element of an array.
+* Assigning Elements::          How to change an element of an array.
+* Array Example::               Basic Example of an Array
+* Scanning an Array::           A variation of the @code{for} statement. It
+                                loops through the indices of an array's
+                                existing elements.
+* Delete::                      The @code{delete} statement removes an element
+                                from an array.
+* Numeric Array Subscripts::    How to use numbers as subscripts in
+                                @code{awk}.
+* Uninitialized Subscripts::    Using Uninitialized variables as subscripts.
+* Multi-dimensional::           Emulating multi-dimensional arrays in
+                                @code{awk}.
+* Multi-scanning::              Scanning multi-dimensional arrays.
+* Calling Built-in::            How to call built-in functions.
+* Numeric Functions::           Functions that work with numbers, including
+                                @code{int}, @code{sin} and @code{rand}.
+* String Functions::            Functions for string manipulation, such as
+                                @code{split}, @code{match}, and
+                                @code{sprintf}.
+* I/O Functions::               Functions for files and shell commands.
+* Time Functions::              Functions for dealing with time stamps.
+* Definition Syntax::           How to write definitions and what they mean.
+* Function Example::            An example function definition and what it
+                                does.
+* Function Caveats::            Things to watch out for.
+* Return Statement::            Specifying the value a function returns.
+* Options::                     Command line options and their meanings.
+* Other Arguments::             Input file names and variable assignments.
+* AWKPATH Variable::            Searching directories for @code{awk} programs.
+* Obsolete::                    Obsolete Options and/or features.
+* Undocumented::                Undocumented Options and Features.
+* Known Bugs::                  Known Bugs in @code{gawk}.
+* Portability Notes::           What to do if you don't have @code{gawk}.
+* Nextfile Function::           Two implementations of a @code{nextfile}
+                                function.
+* Assert Function::             A function for assertions in @code{awk}
+                                programs.
+* Round Function::              A function for rounding if @code{sprintf} does
+                                not do it correctly.
+* Ordinal Functions::           Functions for using characters as numbers and
+                                vice versa.
+* Join Function::               A function to join an array into a string.
+* Mktime Function::             A function to turn a date into a timestamp.
+* Gettimeofday Function::       A function to get formatted times.
+* Filetrans Function::          A function for handling data file transitions.
+* Getopt Function::             A function for processing command line
+                                arguments.
+* Passwd Functions::            Functions for getting user information.
+* Group Functions::             Functions for getting group information.
+* Library Names::               How to best name private global variables in
+                                library functions.
+* Clones::                      Clones of common utilities.
+* Cut Program::                 The @code{cut} utility.
+* Egrep Program::               The @code{egrep} utility.
+* Id Program::                  The @code{id} utility.
+* Split Program::               The @code{split} utility.
+* Tee Program::                 The @code{tee} utility.
+* Uniq Program::                The @code{uniq} utility.
+* Wc Program::                  The @code{wc} utility.
+* Miscellaneous Programs::      Some interesting @code{awk} programs.
+* Dupword Program::             Finding duplicated words in a document.
+* Alarm Program::               An alarm clock.
+* Translate Program::           A program similar to the @code{tr} utility.
+* Labels Program::              Printing mailing labels.
+* Word Sorting::                A program to produce a word usage count.
+* History Sorting::             Eliminating duplicate entries from a history
+                                file.
+* Extract Program::             Pulling out programs from Texinfo source
+                                files.
+* Simple Sed::                  A Simple Stream Editor.
+* Igawk Program::               A wrapper for @code{awk} that includes files.
+* V7/SVR3.1::                   The major changes between V7 and System V
+                                Release 3.1.
+* SVR4::                        Minor changes between System V Releases 3.1
+                                and 4.
+* POSIX::                       New features from the POSIX standard.
+* BTL::                         New features from the Bell Laboratories
+                                version of @code{awk}.
+* POSIX/GNU::                   The extensions in @code{gawk} not in POSIX
+                                @code{awk}.
+* Command Line Summary::        Recapitulation of the command line.
+* Language Summary::            A terse review of the language.
+* Variables/Fields::            Variables, fields, and arrays.
+* Fields Summary::              Input field splitting.
+* Built-in Summary::            @code{awk}'s built-in variables.
+* Arrays Summary::              Using arrays.
+* Data Type Summary::           Values in @code{awk} are numbers or strings.
+* Rules Summary::               Patterns and Actions, and their component
+                                parts.
+* Pattern Summary::             Quick overview of patterns.
+* Regexp Summary::              Quick overview of regular expressions.
+* Actions Summary::             Quick overview of actions.
+* Operator Summary::            @code{awk} operators.
+* Control Flow Summary::        The control statements.
+* I/O Summary::                 The I/O statements.
+* Printf Summary::              A summary of @code{printf}.
+* Special File Summary::        Special file names interpreted internally.
+* Built-in Functions Summary::  Built-in numeric and string functions.
+* Time Functions Summary::      Built-in time functions.
+* String Constants Summary::    Escape sequences in strings.
+* Functions Summary::           Defining and calling functions.
+* Historical Features::         Some undocumented but supported ``features''.
+* Gawk Distribution::           What is in the @code{gawk} distribution.
+* Getting::                     How to get the distribution.
+* Extracting::                  How to extract the distribution.
+* Distribution contents::       What is in the distribution.
+* Unix Installation::           Installing @code{gawk} under various versions
+                                of Unix.
+* Quick Installation::          Compiling @code{gawk} under Unix.
+* Configuration Philosophy::    How it's all supposed to work.
+* VMS Installation::            Installing @code{gawk} on VMS.
+* VMS Compilation::             How to compile @code{gawk} under VMS.
+* VMS Installation Details::    How to install @code{gawk} under VMS.
+* VMS Running::                 How to run @code{gawk} under VMS.
+* VMS POSIX::                   Alternate instructions for VMS POSIX.
+* PC Installation::             Installing and Compiling @code{gawk} on MS-DOS
+                                and OS/2
+* Atari Installation::          Installing @code{gawk} on the Atari ST.
+* Atari Compiling::             Compiling @code{gawk} on Atari
+* Atari Using::                 Running @code{gawk} on Atari
+* Amiga Installation::          Installing @code{gawk} on an Amiga.
+* Bugs::                        Reporting Problems and Bugs.
+* Other Versions::              Other freely available @code{awk}
+                                implementations.
+* Compatibility Mode::          How to disable certain @code{gawk} extensions.
+* Additions::                   Making Additions To @code{gawk}.
+* Adding Code::                 Adding code to the main body of @code{gawk}.
+* New Ports::                   Porting @code{gawk} to a new operating system.
+* Future Extensions::           New features that may be implemented one day.
+* Improvements::                Suggestions for improvements by volunteers.
+
+@end menu
+
+@c dedication for Info file
+@ifinfo
+@center To Miriam, for making me complete.
+@sp 1
+@center To Chana, for the joy you bring us.
+@sp 1
+@center To Rivka, for the exponential increase.
+@sp 1
+@center To Nachum, for the added dimension.
+@end ifinfo
+
+@node Preface, What Is Awk, Top, Top
+@unnumbered Preface
+
+@c I saw a comment somewhere that the preface should describe the book itself,
+@c and the introduction should describe what the book covers.
+
+This @value{DOCUMENT} teaches you about the @code{awk} language and
+how you can use it effectively.  You should already be familiar with basic
+system commands, such as @code{cat} and @code{ls},@footnote{These commands
+are available on POSIX compliant systems, as well as on traditional Unix
+based systems. If you are using some other operating system, you still need to
+be familiar with the ideas of I/O redirection and pipes.} and basic shell
+facilities, such as Input/Output (I/O) redirection and pipes.
+
+Implementations of the @code{awk} language are available for many different
+computing environments.  This @value{DOCUMENT}, while describing the @code{awk} language
+in general, also describes a particular implementation of @code{awk} called
+@code{gawk} (which stands for ``GNU Awk'').  @code{gawk} runs on a broad range
+of Unix systems, ranging from 80386 PC-based computers, up through large scale
+systems, such as Crays. @code{gawk} has also been ported to MS-DOS and
+OS/2 PC's, Atari and Amiga micro-computers, and VMS.
+
+@menu
+* History::                     The history of @code{gawk} and @code{awk}.
+* Manual History::              Brief history of the GNU project and this
+                                @value{DOCUMENT}.
+* Acknowledgements::            Acknowledgements.
+@end menu
+
+@node History, Manual History, Preface, Preface
+@unnumberedsec History of @code{awk} and @code{gawk}
+
+@cindex acronym
+@cindex history of @code{awk}
+@cindex Aho, Alfred
+@cindex Weinberger, Peter
+@cindex Kernighan, Brian
+@cindex old @code{awk}
+@cindex new @code{awk}
+The name @code{awk} comes from the initials of its designers: Alfred V.@:
+Aho, Peter J.@: Weinberger, and Brian W.@: Kernighan.  The original version of
+@code{awk} was written in 1977 at AT&T Bell Laboratories.
+In 1985 a new version made the programming
+language more powerful, introducing user-defined functions, multiple input
+streams, and computed regular expressions.
+This new version became generally available with Unix System V Release 3.1.
+The version in System V Release 4 added some new features and also cleaned
+up the behavior in some of the ``dark corners'' of the language.
+The specification for @code{awk} in the POSIX Command Language
+and Utilities standard further clarified the language based on feedback
+from both the @code{gawk} designers, and the original Bell Labs @code{awk}
+designers.
+
+The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
+and Jay Fenlason, with advice from Richard Stallman.  John Woods
+contributed parts of the code as well.  In 1988 and 1989, David Trueman, with
+help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
+with the newer @code{awk}.  Current development focuses on bug fixes,
+performance improvements, standards compliance, and occasionally, new features.
+
+@node Manual History, Acknowledgements, History, Preface
+@unnumberedsec The GNU Project and This Book
+
+@cindex Free Software Foundation
+@cindex Stallman, Richard
+The Free Software Foundation (FSF) is a non-profit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor.  GNU Emacs is the most widely used version of Emacs today.
+
+@cindex GNU Project
+The GNU project is an on-going effort on the part of the Free Software
+Foundation to create a complete, freely distributable, POSIX compliant
+computing environment.  (GNU stands for ``GNU's not Unix''.)
+The FSF uses the ``GNU General Public License'' (or GPL) to ensure that
+source code for their software is always available to the end user. A
+copy of the GPL is included for your reference
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+The GPL applies to the C language source code for @code{gawk}.
+
+A shell, an editor (Emacs), highly portable optimizing C, C++, and
+Objective-C compilers, a symbolic debugger, and dozens of large and
+small utilities (such as @code{gawk}), have all been completed and are
+freely available.  As of this writing (early 1997), the GNU operating
+system kernel (the HURD), has been released, but is still in an early
+stage of development.
+
+@cindex Linux
+@cindex NetBSD
+@cindex FreeBSD
+Until the GNU operating system is more fully developed, you should
+consider using Linux, a freely distributable, Unix-like operating
+system for 80386, DEC Alpha, Sun SPARC and other systems.  There are
+many books on Linux. One freely available one is @cite{Linux
+Installation and Getting Started}, by Matt Welsh.
+Many Linux distributions are available, often in computer stores or
+bundled on CD-ROM with books about Linux.
+(There are three other freely available, Unix-like operating systems for
+80386 and other systems, NetBSD, FreeBSD,and OpenBSD. All are based on the
+4.4-Lite Berkeley Software Distribution, and they use recent versions
+of @code{gawk} for their versions of @code{awk}.)
+
+@iftex
+This @value{DOCUMENT} you are reading now is actually free.  The
+information in it is freely available to anyone, the machine readable
+source code for the @value{DOCUMENT} comes with @code{gawk}, and anyone
+may take this @value{DOCUMENT} to a copying machine and make as many
+copies of it as they like.  (Take a moment to check the copying
+permissions on the Copyright page.)
+
+If you paid money for this @value{DOCUMENT}, what you actually paid for
+was the @value{DOCUMENT}'s nice printing and binding, and the
+publisher's associated costs to produce it.  We have made an effort to
+keep these costs reasonable; most people would prefer a bound book to
+over 330 pages of photo-copied text that would then have to be held in
+a loose-leaf binder (not to mention the time and labor involved in
+doing the copying).  The same is true of producing this
+@value{DOCUMENT} from the machine readable source; the retail price is
+only slightly more than the cost per page of printing it
+on a laser printer.
+@end iftex
+
+This @value{DOCUMENT} itself has gone through several previous,
+preliminary editions.  I started working on a preliminary draft of
+@cite{The GAWK Manual}, by Diane Close, Paul Rubin, and Richard
+Stallman in the fall of 1988.
+It was around 90 pages long, and barely described the original, ``old''
+version of @code{awk}. After substantial revision, the first version of
+the @cite{The GAWK Manual} to be released was Edition 0.11 Beta in
+October of 1989.  The manual then underwent more substantial revision
+for Edition 0.13 of December 1991.
+David Trueman, Pat Rankin, and Michal Jaegermann contributed sections
+of the manual for Edition 0.13.
+That edition was published by the
+FSF as a bound book early in 1992.  Since then there have been several
+minor revisions, notably Edition 0.14 of November 1992 that was published
+by the FSF in January of 1993, and Edition 0.16 of August 1993.
+
+Edition 1.0 of @cite{@value{TITLE}} represents a significant re-working
+of @cite{The GAWK Manual}, with much additional material.
+The FSF and I agree that I am now the primary author.
+I also felt that it needed a more descriptive title.
+
+@cite{@value{TITLE}} will undoubtedly continue to evolve.
+An electronic version
+comes with the @code{gawk} distribution from the FSF.
+If you find an error in this @value{DOCUMENT}, please report it!
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on submitting
+problem reports electronically, or write to me in care of the FSF.
+
+@node Acknowledgements, , Manual History, Preface
+@unnumberedsec Acknowledgements
+
+@cindex Stallman, Richard
+I would like to acknowledge Richard M.@: Stallman, for his vision of a
+better world, and for his courage in founding the FSF and starting the
+GNU project.
+
+The initial draft of @cite{The GAWK Manual} had the following acknowledgements:
+
+@quotation
+Many people need to be thanked for their assistance in producing this
+manual.  Jay Fenlason contributed many ideas and sample programs.  Richard
+Mlynarik and Robert Chassell gave helpful comments on drafts of this
+manual.  The paper @cite{A Supplemental Document for @code{awk}} by John W.@:
+Pierce of the Chemistry Department at UC San Diego, pinpointed several
+issues relevant both to @code{awk} implementation and to this manual, that
+would otherwise have escaped us.
+@end quotation
+
+The following people provided many helpful comments on Edition 0.13 of
+@cite{The GAWK Manual}: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
+Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
+and Michal Jaegermann.
+
+The following people provided many helpful comments for Edition 1.0 of
+@cite{@value{TITLE}}: Karl Berry, Michael Brennan, Darrel
+Hankerson, Michal Jaegermann, Michael Lijewski, and Miriam Robbins.
+Pat Rankin, Michal Jaegermann, Darrel Hankerson and Scott Deifik
+updated their respective sections for Edition 1.0.
+
+Robert J.@: Chassell provided much valuable advice on
+the use of Texinfo.  He also deserves special thanks for
+convincing me @emph{not} to title this @value{DOCUMENT}
+@cite{How To Gawk Politely}.
+Karl Berry helped significantly with the @TeX{} part of Texinfo.
+
+@cindex Trueman, David
+David Trueman deserves special credit; he has done a yeoman job
+of evolving @code{gawk} so that it performs well, and without bugs.
+Although he is no longer involved with @code{gawk},
+working with him on this project was a significant pleasure.
+
+@cindex Deifik, Scott
+@cindex Hankerson, Darrel
+@cindex Rommel, Kai Uwe
+@cindex Rankin, Pat
+@cindex Jaegermann, Michal
+Scott Deifik, Darrel Hankerson, Kai Uwe Rommel, Pat Rankin, and Michal
+Jaegermann (in no particular order) are long time members of the
+@code{gawk} ``crack portability team.''  Without their hard work and
+help, @code{gawk} would not be nearly the fine program it is today.  It
+has been and continues to be a pleasure working with this team of fine
+people.
+
+@cindex Friedl, Jeffrey
+Jeffrey Friedl provided invaluable help in tracking down a number
+of last minute problems with regular expressions in @code{gawk} 3.0.
+
+@cindex Kernighan, Brian
+David and I would like to thank Brian Kernighan of Bell Labs for
+invaluable assistance during the testing and debugging of @code{gawk}, and for
+help in clarifying numerous points about the language.  We could not have
+done nearly as good a job on either @code{gawk} or its documentation without
+his help.
+
+@cindex Hughes, Phil
+I would like to thank Marshall and Elaine Hartholz of Seattle, and Dr.@:
+Bert and Rita Schreiber of Detroit for large amounts of quiet vacation
+time in their homes, which allowed me to make significant progress on
+this @value{DOCUMENT} and on @code{gawk} itself.  Phil Hughes of SSC
+contributed in a very important way by loaning me his laptop Linux
+system, not once, but twice, allowing me to do a lot of work while
+away from home.
+
+@cindex Robbins, Miriam
+Finally, I must thank my wonderful wife, Miriam, for her patience through
+the many versions of this project, for her proof-reading,
+and for sharing me with the computer.
+I would like to thank my parents for their love, and for the grace with
+which they raised and educated me.
+I also must acknowledge my gratitude to G-d, for the many opportunities
+He has sent my way, as well as for the gifts He has given me with which to
+take advantage of those opportunities.
+@sp 2
+@noindent
+Arnold Robbins @*
+Atlanta, Georgia @*
+February, 1997
+
+@ignore
+Stuff still not covered anywhere:
+BASICS:
+   Integer vs. floating point
+   Hex vs. octal vs. decimal
+   Interpreter vs compiler
+   input/output
+@end ignore
+
+@node What Is Awk, Getting Started, Preface, Top
+@chapter Introduction
+
+If you are like many computer users, you would frequently like to make
+changes in various text files wherever certain patterns appear, or
+extract data from parts of certain lines while discarding the rest.  To
+write a program to do this in a language such as C or Pascal is a
+time-consuming inconvenience that may take many lines of code.  The job
+may be easier with @code{awk}.
+
+The @code{awk} utility interprets a special-purpose programming language
+that makes it possible to handle simple data-reformatting jobs
+with just a few lines of code.
+
+The GNU implementation of @code{awk} is called @code{gawk}; it is fully
+upward compatible with the System V Release 4 version of
+@code{awk}.  @code{gawk} is also upward compatible with the POSIX
+specification of the @code{awk} language.  This means that all
+properly written @code{awk} programs should work with @code{gawk}.
+Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
+implementations.
+
+@cindex uses of @code{awk}
+Using @code{awk} you can:
+
+@itemize @bullet
+@item
+manage small, personal databases
+
+@item
+generate reports
+
+@item
+validate data
+
+@item
+produce indexes, and perform other document preparation tasks
+
+@item
+even experiment with algorithms that can be adapted later to other computer
+languages
+@end itemize
+
+@menu
+* This Manual::                 Using this @value{DOCUMENT}. Includes sample
+                                input files that you can use.
+* Conventions::                 Typographical Conventions.
+* Sample Data Files::           Sample data files for use in the @code{awk} 
+                                programs illustrated in this @value{DOCUMENT}.
+@end menu
+
+@node This Manual, Conventions, What Is Awk, What Is Awk
+@section Using This Book
+@cindex book, using this
+@cindex using this book
+@cindex language, @code{awk}
+@cindex program, @code{awk}
+@ignore
+@cindex @code{awk} language
+@cindex @code{awk} program
+@end ignore
+
+The term @code{awk} refers to a particular program, and to the language you
+use to tell this program what to do.  When we need to be careful, we call
+the program ``the @code{awk} utility'' and the language ``the @code{awk}
+language.''  The term @code{gawk} refers to a version of @code{awk} developed
+as part the GNU project.  The purpose of this @value{DOCUMENT} is to explain
+both the @code{awk} language and how to run the @code{awk} utility.
+
+The main purpose of the @value{DOCUMENT} is to explain the features
+of @code{awk}, as defined in the POSIX standard.  It does so in the context
+of one particular implementation, @code{gawk}. While doing so, it will also
+attempt to describe important differences between @code{gawk} and other
+@code{awk} implementations.  Finally, any @code{gawk} features that
+are not in the POSIX standard for @code{awk} will be noted.
+
+@iftex
+This @value{DOCUMENT} has the difficult task of being both tutorial and reference.
+If you are a novice, feel free to skip over details that seem too complex.
+You should also ignore the many cross references; they are for the
+expert user, and for the on-line Info version of the document.
+@end iftex
+
+The term @dfn{@code{awk} program} refers to a program written by you in
+the @code{awk} programming language.
+
+@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
+essentials you need to know to start using @code{awk}.  
+
+Some useful ``one-liners'' are included to give you a feel for the
+@code{awk} language (@pxref{One-liners, ,Useful One Line Programs}).
+
+Many sample @code{awk} programs have been provided for you
+(@pxref{Library Functions, ,A Library of @code{awk} Functions}; also
+@pxref{Sample Programs, ,Practical @code{awk} Programs}).
+
+The entire @code{awk} language is summarized for quick reference in
+@ref{Gawk Summary, ,@code{gawk} Summary}.  Look there if you just need
+to refresh your memory about a particular feature.
+
+If you find terms that you aren't familiar with, try looking them
+up in the glossary (@pxref{Glossary}).
+
+Most of the time complete @code{awk} programs are used as examples, but in
+some of the more advanced sections, only the part of the @code{awk} program
+that illustrates the concept being described is shown.
+
+While this @value{DOCUMENT} is aimed principally at people who have not been
+exposed
+to @code{awk}, there is a lot of information here that even the @code{awk}
+expert should find useful.  In particular, the description of POSIX
+@code{awk}, and the example programs in
+@ref{Library Functions, ,A Library of @code{awk} Functions}, and
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+should be of interest.
+
+@c fakenode --- for prepinfo
+@unnumberedsubsec Dark Corners
+@display
+@i{Who opened that window shade?!?}
+Count Dracula
+@end display
+@sp 1
+
+@cindex d.c., see ``dark corner''
+@cindex dark corner
+Until the POSIX standard (and @cite{The Gawk Manual}),
+many features of @code{awk} were either poorly documented, or not
+documented at all.  Descriptions of such features
+(often called ``dark corners'') are noted in this @value{DOCUMENT} with
+``(d.c.)''.
+They also appear in the index under the heading ``dark corner.''
+
+@node Conventions, Sample Data Files, This Manual, What Is Awk
+@section Typographical Conventions
+
+This @value{DOCUMENT} is written using Texinfo, the GNU documentation formatting language.
+A single Texinfo source file is used to produce both the printed and on-line
+versions of the documentation.
+@iftex
+Because of this, the typographical conventions
+are slightly different than in other books you may have read.
+@end iftex
+@ifinfo
+This section briefly documents the typographical conventions used in Texinfo.
+@end ifinfo
+
+Examples you would type at the command line are preceded by the common
+shell primary and secondary prompts, @samp{$} and @samp{>}.
+Output from the command is preceded by the glyph ``@print{}''.
+This typically represents the command's standard output.
+Error messages, and other output on the command's standard error, are preceded
+by the glyph ``@error{}''.  For example:
+
+@example
+@group
+$ echo hi on stdout
+@print{} hi on stdout
+$ echo hello on stderr 1>&2
+@error{} hello on stderr
+@end group
+@end example
+
+@iftex
+In the text, command names appear in @code{this font}, while code segments
+appear in the same font and quoted, @samp{like this}.  Some things will
+be emphasized @emph{like this}, and if a point needs to be made
+strongly, it will be done @strong{like this}.  The first occurrence of
+a new term is usually its @dfn{definition}, and appears in the same
+font as the previous occurrence of ``definition'' in this sentence.
+File names are indicated like this: @file{/path/to/ourfile}.
+@end iftex
+
+Characters that you type at the keyboard look @kbd{like this}.  In particular,
+there are special characters called ``control characters.''  These are
+characters that you type by holding down both the @kbd{CONTROL} key and
+another key, at the same time.  For example, a @kbd{Control-d} is typed
+by first pressing and holding the @kbd{CONTROL} key, next
+pressing the @kbd{d} key, and finally releasing both keys.
+
+@node Sample Data Files,  , Conventions, What Is Awk
+@section Data Files for the Examples
+
+@cindex input file, sample
+@cindex sample input file
+@cindex @file{BBS-list} file
+Many of the examples in this @value{DOCUMENT} take their input from two sample
+data files.  The first, called @file{BBS-list}, represents a list of
+computer bulletin board systems together with information about those systems.
+The second data file, called @file{inventory-shipped}, contains
+information about shipments on a monthly basis.  In both files,
+each line is considered to be one @dfn{record}.
+
+In the file @file{BBS-list}, each record contains the name of a computer
+bulletin board, its phone number, the board's baud rate(s), and a code for
+the number of hours it is operational.  An @samp{A} in the last column
+means the board operates 24 hours a day.  A @samp{B} in the last
+column means the board operates evening and weekend hours, only.  A
+@samp{C} means the board operates only on weekends.
+
+@c 2e: Update the baud rates to reflect today's faster modems
+@example
+@c system mkdir eg
+@c system mkdir eg/lib
+@c system mkdir eg/data
+@c system mkdir eg/prog
+@c system mkdir eg/misc
+@c file eg/data/BBS-list
+aardvark     555-5553     1200/300          B
+alpo-net     555-3412     2400/1200/300     A
+barfly       555-7685     1200/300          A
+bites        555-1675     2400/1200/300     A
+camelot      555-0542     300               C
+core         555-2912     1200/300          C
+fooey        555-1234     2400/1200/300     B
+foot         555-6699     1200/300          B
+macfoo       555-6480     1200/300          A
+sdace        555-3430     2400/1200/300     A
+sabafoo      555-2127     1200/300          C
+@c endfile
+@end example
+
+@cindex @file{inventory-shipped} file
+The second data file, called @file{inventory-shipped}, represents
+information about shipments during the year.  
+Each record contains the month of the year, the number
+of green crates shipped, the number of red boxes shipped, the number of
+orange bags shipped, and the number of blue packages shipped,
+respectively.  There are 16 entries, covering the 12 months of one year
+and four months of the next year.
+
+@example
+@c file eg/data/inventory-shipped
+Jan  13  25  15 115
+Feb  15  32  24 226
+Mar  15  24  34 228
+Apr  31  52  63 420
+May  16  34  29 208
+Jun  31  42  75 492
+Jul  24  34  67 436
+Aug  15  34  47 316
+Sep  13  55  37 277
+Oct  29  54  68 525
+Nov  20  87  82 577
+Dec  17  35  61 401
+
+Jan  21  36  64 620
+Feb  26  58  80 652
+Mar  24  75  70 495
+Apr  21  70  74 514
+@c endfile
+@end example
+
+@ifinfo
+If you are reading this in GNU Emacs using Info, you can copy the regions
+of text showing these sample files into your own test files.  This way you
+can try out the examples shown in the remainder of this document.  You do
+this by using the command @kbd{M-x write-region} to copy text from the Info
+file into a file for use with @code{awk}
+(@xref{Misc File Ops, , Miscellaneous File Operations, emacs, GNU Emacs Manual},
+for more information).  Using this information, create your own
+@file{BBS-list} and @file{inventory-shipped} files, and practice what you
+learn in this @value{DOCUMENT}.
+
+If you are using the stand-alone version of Info,
+see @ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+for an @code{awk} program that will extract these data files from
+@file{gawk.texi}, the Texinfo source file for this Info file.
+@end ifinfo
+
+@node Getting Started, One-liners, What Is Awk, Top
+@chapter Getting Started with @code{awk}
+@cindex script, definition of
+@cindex rule, definition of
+@cindex program, definition of
+@cindex basic function of @code{awk}
+
+The basic function of @code{awk} is to search files for lines (or other
+units of text) that contain certain patterns.  When a line matches one
+of the patterns, @code{awk} performs specified actions on that line.
+@code{awk} keeps processing input lines in this way until the end of the
+input files are reached.
+
+@cindex data-driven languages
+@cindex procedural languages
+@cindex language, data-driven
+@cindex language, procedural
+Programs in @code{awk} are different from programs in most other languages,
+because @code{awk} programs are @dfn{data-driven}; that is, you describe
+the data you wish to work with, and then what to do when you find it.
+Most other languages are @dfn{procedural}; you have to describe, in great
+detail, every step the program is to take.  When working with procedural
+languages, it is usually much
+harder to clearly describe the data your program will process.
+For this reason, @code{awk} programs are often refreshingly easy to both
+write and read.
+
+@cindex program, definition of
+@cindex rule, definition of
+When you run @code{awk}, you specify an @code{awk} @dfn{program} that
+tells @code{awk} what to do.  The program consists of a series of
+@dfn{rules}.  (It may also contain @dfn{function definitions},
+an advanced feature which we will ignore for now.
+@xref{User-defined, ,User-defined Functions}.)  Each rule specifies one
+pattern to search for, and one action to perform when that pattern is found.
+
+Syntactically, a rule consists of a pattern followed by an action.  The
+action is enclosed in curly braces to separate it from the pattern.
+Rules are usually separated by newlines.  Therefore, an @code{awk}
+program looks like this:
+
+@example
+@var{pattern} @{ @var{action} @}
+@var{pattern} @{ @var{action} @}
+@dots{}
+@end example
+
+@menu
+* Names::                       What name to use to find @code{awk}.
+* Running gawk::                How to run @code{gawk} programs; includes
+                                command line syntax.
+* Very Simple::                 A very simple example.
+* Two Rules::                   A less simple one-line example with two rules.
+* More Complex::                A more complex example.
+* Statements/Lines::            Subdividing or combining statements into
+                                lines.
+* Other Features::              Other Features of @code{awk}.
+* When::                        When to use @code{gawk} and when to use other
+                                things.
+@end menu
+
+@node Names, Running gawk , Getting Started, Getting Started
+@section A Rose By Any Other Name
+
+@cindex old @code{awk} vs. new @code{awk}
+@cindex new @code{awk} vs. old @code{awk}
+The @code{awk} language has evolved over the years. Full details are
+provided in @ref{Language History, ,The Evolution of the @code{awk} Language}.
+The language described in this @value{DOCUMENT}
+is often referred to as ``new @code{awk}.''
+
+Because of this, many systems have multiple
+versions of @code{awk}.
+Some systems have an @code{awk} utility that implements the
+original version of the @code{awk} language, and a @code{nawk} utility
+for the new version.  Others have an @code{oawk} for the ``old @code{awk}''
+language, and plain @code{awk} for the new one.  Still others only
+have one version, usually the new one.@footnote{Often, these systems
+use @code{gawk} for their @code{awk} implementation!}
+
+All in all, this makes it difficult for you to know which version of
+@code{awk} you should run when writing your programs.  The best advice
+we can give here is to check your local documentation. Look for @code{awk},
+@code{oawk}, and @code{nawk}, as well as for @code{gawk}. Chances are, you
+will have some version of new @code{awk} on your system, and that is what
+you should use when running your programs.  (Of course, if you're reading
+this @value{DOCUMENT}, chances are good that you have @code{gawk}!)
+
+Throughout this @value{DOCUMENT}, whenever we refer to a language feature
+that should be available in any complete implementation of POSIX @code{awk},
+we simply use the term @code{awk}.  When referring to a feature that is
+specific to the GNU implementation, we use the term @code{gawk}.
+
+@node Running gawk, Very Simple, Names, Getting Started
+@section How to Run @code{awk} Programs
+
+@cindex command line formats
+@cindex running @code{awk} programs
+There are several ways to run an @code{awk} program.  If the program is
+short, it is easiest to include it in the command that runs @code{awk},
+like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of patterns and actions, as
+described earlier.
+(The reason for the single quotes is described below, in
+@ref{One-shot, ,One-shot Throw-away @code{awk} Programs}.)
+
+When the program is long, it is usually more convenient to put it in a file
+and run it with a command like this:
+
+@example
+awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@menu
+* One-shot::                    Running a short throw-away @code{awk} program.
+* Read Terminal::               Using no input files (input from terminal
+                                instead).
+* Long::                        Putting permanent @code{awk} programs in
+                                files.
+* Executable Scripts::          Making self-contained @code{awk} programs.
+* Comments::                    Adding documentation to @code{gawk} programs.
+@end menu
+
+@node One-shot, Read Terminal, Running gawk, Running gawk
+@subsection One-shot Throw-away @code{awk} Programs
+
+Once you are familiar with @code{awk}, you will often type in simple
+programs the moment you want to use them.  Then you can write the
+program as the first argument of the @code{awk} command, like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of @var{patterns} and
+@var{actions}, as described earlier.
+
+@cindex single quotes, why needed
+This command format instructs the @dfn{shell}, or command interpreter,
+to start @code{awk} and use the @var{program} to process records in the
+input file(s).  There are single quotes around @var{program} so that
+the shell doesn't interpret any @code{awk} characters as special shell
+characters.  They also cause the shell to treat all of @var{program} as
+a single argument for @code{awk} and allow @var{program} to be more
+than one line long.
+
+This format is also useful for running short or medium-sized @code{awk}
+programs from shell scripts, because it avoids the need for a separate
+file for the @code{awk} program.  A self-contained shell script is more
+reliable since there are no other files to misplace.
+
+@ref{One-liners, , Useful One Line Programs}, presents several short,
+self-contained programs.
+
+As an interesting side point, the command
+
+@example
+awk '/foo/' @var{files} @dots{}
+@end example
+
+@noindent
+is essentially the same as
+
+@cindex @code{egrep}
+@example
+egrep foo @var{files} @dots{}
+@end example
+
+@node Read Terminal, Long, One-shot, Running gawk
+@subsection Running @code{awk} without Input Files
+
+@cindex standard input
+@cindex input, standard
+You can also run @code{awk} without any input files.  If you type the
+command line:
+
+@example
+awk '@var{program}'
+@end example
+
+@noindent
+then @code{awk} applies the @var{program} to the @dfn{standard input},
+which usually means whatever you type on the terminal.  This continues
+until you indicate end-of-file by typing @kbd{Control-d}.
+(On other operating systems, the end-of-file character may be different.
+For example, on OS/2 and MS-DOS, it is @kbd{Control-z}.)
+
+For example, the following program prints a friendly piece of advice
+(from Douglas Adams' @cite{The Hitchhiker's Guide to the Galaxy}),
+to keep you from worrying about the complexities of computer programming
+(@samp{BEGIN} is a feature we haven't discussed yet).
+
+@example
+$ awk "BEGIN @{ print \"Don't Panic!\" @}"
+@print{} Don't Panic!
+@end example
+
+@cindex quoting, shell
+@cindex shell quoting
+This program does not read any input.  The @samp{\} before each of the
+inner double quotes is necessary because of the shell's quoting rules,
+in particular because it mixes both single quotes and double quotes.
+
+This next simple @code{awk} program
+emulates the @code{cat} utility; it copies whatever you type at the
+keyboard to its standard output. (Why this works is explained shortly.)
+
+@example
+$ awk '@{ print @}'
+Now is the time for all good men
+@print{} Now is the time for all good men
+to come to the aid of their country.
+@print{} to come to the aid of their country.
+Four score and seven years ago, ...
+@print{} Four score and seven years ago, ...
+What, me worry?
+@print{} What, me worry?
+@kbd{Control-d}
+@end example
+
+@node Long, Executable Scripts, Read Terminal, Running gawk
+@subsection Running Long Programs
+
+@cindex running long programs
+@cindex @code{-f} option
+@cindex program file
+@cindex file, @code{awk} program
+Sometimes your @code{awk} programs can be very long.  In this case it is
+more convenient to put the program into a separate file.  To tell
+@code{awk} to use that file for its program, you type:
+
+@example
+awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+The @samp{-f} instructs the @code{awk} utility to get the @code{awk} program
+from the file @var{source-file}.  Any file name can be used for
+@var{source-file}.  For example, you could put the program:
+
+@example
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+into the file @file{advice}.  Then this command:
+
+@example
+awk -f advice
+@end example
+
+@noindent
+does the same thing as this one:
+
+@example
+awk "BEGIN @{ print \"Don't Panic!\" @}"
+@end example
+
+@cindex quoting, shell
+@cindex shell quoting
+@noindent
+which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
+Note that you don't usually need single quotes around the file name that you
+specify with @samp{-f}, because most file names don't contain any of the shell's
+special characters.  Notice that in @file{advice}, the @code{awk}
+program did not have single quotes around it.  The quotes are only needed
+for programs that are provided on the @code{awk} command line.
+
+If you want to identify your @code{awk} program files clearly as such,
+you can add the extension @file{.awk} to the file name.  This doesn't
+affect the execution of the @code{awk} program, but it does make
+``housekeeping'' easier.
+
+@node Executable Scripts, Comments, Long, Running gawk
+@subsection Executable @code{awk} Programs
+@cindex executable scripts
+@cindex scripts, executable
+@cindex self contained programs
+@cindex program, self contained
+@cindex @code{#!} (executable scripts)
+
+Once you have learned @code{awk}, you may want to write self-contained
+@code{awk} scripts, using the @samp{#!} script mechanism.  You can do
+this on many Unix systems@footnote{The @samp{#!} mechanism works on
+Linux systems,
+Unix systems derived from Berkeley Unix, System V Release 4, and some System
+V Release 3 systems.} (and someday on the GNU system).
+
+For example, you could update the file @file{advice} to look like this:
+
+@example
+#! /bin/awk -f
+
+BEGIN    @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+After making this file executable (with the @code{chmod} utility), you
+can simply type @samp{advice}
+at the shell, and the system will arrange to run @code{awk}@footnote{The
+line beginning with @samp{#!} lists the full file name of an interpreter
+to be run, and an optional initial command line argument to pass to that
+interpreter.  The operating system then runs the interpreter with the given
+argument and the full argument list of the executed program.  The first argument
+in the list is the full file name of the @code{awk} program.  The rest of the
+argument list will either be options to @code{awk}, or data files,
+or both.} as if you had typed @samp{awk -f advice}.
+
+@example
+@group
+$ advice
+@print{} Don't Panic!
+@end group
+@end example
+
+@noindent
+Self-contained @code{awk} scripts are useful when you want to write a
+program which users can invoke without their having to know that the program is
+written in @code{awk}.
+
+@cindex shell scripts
+@cindex scripts, shell
+Some older systems do not support the @samp{#!} mechanism. You can get a
+similar effect using a regular shell script.  It would look something
+like this:
+
+@example
+: The colon ensures execution by the standard shell.
+awk '@var{program}' "$@@"
+@end example
+
+Using this technique, it is @emph{vital} to enclose the @var{program} in
+single quotes to protect it from interpretation by the shell.  If you
+omit the quotes, only a shell wizard can predict the results.
+
+The @code{"$@@"} causes the shell to forward all the command line
+arguments to the @code{awk} program, without interpretation.  The first
+line, which starts with a colon, is used so that this shell script will
+work even if invoked by a user who uses the C shell.  (Not all older systems
+obey this convention, but many do.)
+@c 2e:
+@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
+
+@node Comments,  , Executable Scripts, Running gawk
+@subsection Comments in @code{awk} Programs
+@cindex @code{#} (comment)
+@cindex comments
+@cindex use of comments
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+
+A @dfn{comment} is some text that is included in a program for the sake
+of human readers; it is not really part of the program.  Comments
+can explain what the program does, and how it works.  Nearly all
+programming languages have provisions for comments, because programs are
+typically hard to understand without their extra help.
+
+In the @code{awk} language, a comment starts with the sharp sign
+character, @samp{#}, and continues to the end of the line.
+The @samp{#} does not have to be the first character on the line. The
+@code{awk} language ignores the rest of a line following a sharp sign.
+For example, we could have put the following into @file{advice}:
+
+@example
+# This program prints a nice friendly message.  It helps
+# keep novice users from being afraid of the computer.
+BEGIN    @{ print "Don't Panic!" @}
+@end example
+
+You can put comment lines into keyboard-composed throw-away @code{awk}
+programs also, but this usually isn't very useful; the purpose of a
+comment is to help you or another person understand the program at
+a later time.
+
+@node Very Simple, Two Rules, Running gawk, Getting Started
+@section A Very Simple Example
+
+The following command runs a simple @code{awk} program that searches the
+input file @file{BBS-list} for the string of characters: @samp{foo}.  (A
+string of characters is usually called a @dfn{string}.
+The term @dfn{string} is perhaps based on similar usage in English, such
+as ``a string of pearls,'' or, ``a string of cars in a train.'')
+
+@example
+awk '/foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+When lines containing @samp{foo} are found, they are printed, because
+@w{@samp{print $0}} means print the current line.  (Just @samp{print} by
+itself means the same thing, so we could have written that
+instead.)
+
+You will notice that slashes, @samp{/}, surround the string @samp{foo}
+in the @code{awk} program.  The slashes indicate that @samp{foo}
+is a pattern to search for.  This type of pattern is called a
+@dfn{regular expression}, and is covered in more detail later
+(@pxref{Regexp, ,Regular Expressions}).
+The pattern is allowed to match parts of words.
+There are
+single-quotes around the @code{awk} program so that the shell won't
+interpret any of it as special shell characters.
+
+Here is what this program prints:
+
+@example
+@group
+$ awk '/foo/ @{ print $0 @}' BBS-list
+@print{} fooey        555-1234     2400/1200/300     B
+@print{} foot         555-6699     1200/300          B
+@print{} macfoo       555-6480     1200/300          A
+@print{} sabafoo      555-2127     1200/300          C
+@end group
+@end example
+
+@cindex action, default
+@cindex pattern, default
+@cindex default action
+@cindex default pattern
+In an @code{awk} rule, either the pattern or the action can be omitted,
+but not both.  If the pattern is omitted, then the action is performed
+for @emph{every} input line.  If the action is omitted, the default
+action is to print all lines that match the pattern.
+
+@cindex empty action
+@cindex action, empty
+Thus, we could leave out the action (the @code{print} statement and the curly
+braces) in the above example, and the result would be the same: all
+lines matching the pattern @samp{foo} would be printed.  By comparison,
+omitting the @code{print} statement but retaining the curly braces makes an
+empty action that does nothing; then no lines would be printed.
+
+@node Two Rules, More Complex, Very Simple, Getting Started
+@section An Example with Two Rules
+@cindex how @code{awk} works
+
+The @code{awk} utility reads the input files one line at a
+time.  For each line, @code{awk} tries the patterns of each of the rules.
+If several patterns match then several actions are run, in the order in
+which they appear in the @code{awk} program.  If no patterns match, then
+no actions are run.
+
+After processing all the rules (perhaps none) that match the line,
+@code{awk} reads the next line (however,
+@pxref{Next Statement, ,The @code{next} Statement},
+and also @pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+This continues until the end of the file is reached.
+
+For example, the @code{awk} program:
+
+@example
+/12/  @{ print $0 @}
+/21/  @{ print $0 @}
+@end example
+
+@noindent
+contains two rules.  The first rule has the string @samp{12} as the
+pattern and @samp{print $0} as the action.  The second rule has the
+string @samp{21} as the pattern and also has @samp{print $0} as the
+action.  Each rule's action is enclosed in its own pair of braces.
+
+This @code{awk} program prints every line that contains the string
+@samp{12} @emph{or} the string @samp{21}.  If a line contains both
+strings, it is printed twice, once by each rule.
+
+This is what happens if we run this program on our two sample data files,
+@file{BBS-list} and @file{inventory-shipped}, as shown here:
+
+@example
+$ awk '/12/ @{ print $0 @}
+>      /21/ @{ print $0 @}' BBS-list inventory-shipped
+@print{} aardvark     555-5553     1200/300          B
+@print{} alpo-net     555-3412     2400/1200/300     A
+@print{} barfly       555-7685     1200/300          A
+@print{} bites        555-1675     2400/1200/300     A
+@print{} core         555-2912     1200/300          C
+@print{} fooey        555-1234     2400/1200/300     B
+@print{} foot         555-6699     1200/300          B
+@print{} macfoo       555-6480     1200/300          A
+@print{} sdace        555-3430     2400/1200/300     A
+@print{} sabafoo      555-2127     1200/300          C
+@print{} sabafoo      555-2127     1200/300          C
+@print{} Jan  21  36  64 620
+@print{} Apr  21  70  74 514
+@end example
+
+@noindent
+Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
+was printed twice, once for each rule.
+
+@node More Complex, Statements/Lines, Two Rules, Getting Started
+@section A More Complex Example
+
+@ignore
+We have to use ls -lg here to get portable output across Unix systems.
+The POSIX ls matches this behavior too. Sigh.
+@end ignore
+Here is an example to give you an idea of what typical @code{awk}
+programs do.  This example shows how @code{awk} can be used to
+summarize, select, and rearrange the output of another utility.  It uses
+features that haven't been covered yet, so don't worry if you don't
+understand all the details.
+
+@example
+ls -lg | awk '$6 == "Nov" @{ sum += $5 @}
+             END @{ print sum @}'
+@end example
+
+@cindex @code{csh}, backslash continuation
+@cindex backslash continuation in @code{csh}
+This command prints the total number of bytes in all the files in the
+current directory that were last modified in November (of any year).
+(In the C shell you would need to type a semicolon and then a backslash
+at the end of the first line; in a POSIX-compliant shell, such as the
+Bourne shell or Bash, the GNU Bourne-Again shell, you can type the example
+as shown.)
+@ignore
+FIXME:  how can users tell what shell they are running?  Need a footnote
+or something, but getting into this is a distraction.
+@end ignore
+
+The @w{@samp{ls -lg}} part of this example is a system command that gives
+you a listing of the files in a directory, including file size and the date
+the file was last modified. Its output looks like this:
+
+@example
+-rw-r--r--  1 arnold   user   1933 Nov  7 13:05 Makefile
+-rw-r--r--  1 arnold   user  10809 Nov  7 13:03 gawk.h
+-rw-r--r--  1 arnold   user    983 Apr 13 12:14 gawk.tab.h
+-rw-r--r--  1 arnold   user  31869 Jun 15 12:20 gawk.y
+-rw-r--r--  1 arnold   user  22414 Nov  7 13:03 gawk1.c
+-rw-r--r--  1 arnold   user  37455 Nov  7 13:03 gawk2.c
+-rw-r--r--  1 arnold   user  27511 Dec  9 13:07 gawk3.c
+-rw-r--r--  1 arnold   user   7989 Nov  7 13:03 gawk4.c
+@end example
+
+@noindent
+The first field contains read-write permissions, the second field contains
+the number of links to the file, and the third field identifies the owner of
+the file. The fourth field identifies the group of the file.
+The fifth field contains the size of the file in bytes.  The
+sixth, seventh and eighth fields contain the month, day, and time,
+respectively, that the file was last modified.  Finally, the ninth field
+contains the name of the file.
+
+@cindex automatic initialization
+@cindex initialization, automatic
+The @samp{$6 == "Nov"} in our @code{awk} program is an expression that
+tests whether the sixth field of the output from @w{@samp{ls -lg}}
+matches the string @samp{Nov}.  Each time a line has the string
+@samp{Nov} for its sixth field, the action @samp{sum += $5} is
+performed.  This adds the fifth field (the file size) to the variable
+@code{sum}.  As a result, when @code{awk} has finished reading all the
+input lines, @code{sum} is the sum of the sizes of files whose
+lines matched the pattern.  (This works because @code{awk} variables
+are automatically initialized to zero.)
+
+After the last line of output from @code{ls} has been processed, the
+@code{END} rule is executed, and the value of @code{sum} is
+printed.  In this example, the value of @code{sum} would be 80600.
+
+These more advanced @code{awk} techniques are covered in later sections
+(@pxref{Action Overview, ,Overview of Actions}).  Before you can move on to more
+advanced @code{awk} programming, you have to know how @code{awk} interprets
+your input and displays your output.  By manipulating fields and using
+@code{print} statements, you can produce some very useful and impressive
+looking reports.
+
+@node Statements/Lines, Other Features, More Complex, Getting Started
+@section @code{awk} Statements Versus Lines
+@cindex line break
+@cindex newline
+
+Most often, each line in an @code{awk} program is a separate statement or
+separate rule, like this:
+
+@example
+awk '/12/  @{ print $0 @}
+     /21/  @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+However, @code{gawk} will ignore newlines after any of the following:
+
+@example
+,    @{    ?    :    ||    &&    do    else
+@end example
+
+@noindent
+A newline at any other point is considered the end of the statement.
+(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
+extension.  The @samp{?} and @samp{:} referred to here is the 
+three operand conditional expression described in
+@ref{Conditional Exp, ,Conditional Expressions}.)
+
+@cindex backslash continuation
+@cindex continuation of lines
+@cindex line continuation
+If you would like to split a single statement into two lines at a point
+where a newline would terminate it, you can @dfn{continue} it by ending the
+first line with a backslash character, @samp{\}.  The backslash must be
+the final character on the line to be recognized as a continuation
+character.  This is allowed absolutely anywhere in the statement, even
+in the middle of a string or regular expression.  For example:
+
+@example
+awk '/This regular expression is too long, so continue it\
+ on the next line/ @{ print $1 @}'
+@end example
+
+@noindent
+@cindex portability issues
+We have generally not used backslash continuation in the sample programs
+in this @value{DOCUMENT}.  Since in @code{gawk} there is no limit on the
+length of a line, it is never strictly necessary; it just makes programs
+more readable.  For this same reason, as well as for clarity, we have
+kept most statements short in the sample programs presented throughout
+the @value{DOCUMENT}.  Backslash continuation is most useful when your
+@code{awk} program is in a separate source file, instead of typed in on
+the command line.  You should also note that many @code{awk}
+implementations are more particular about where you may use backslash
+continuation. For example, they may not allow you to split a string
+constant using backslash continuation.  Thus, for maximal portability of
+your @code{awk} programs, it is best not to split your lines in the
+middle of a regular expression or a string.
+
+@cindex @code{csh}, backslash continuation
+@cindex backslash continuation in @code{csh}
+@strong{Caution: backslash continuation does not work as described above
+with the C shell.}  Continuation with backslash works for @code{awk}
+programs in files, and also for one-shot programs @emph{provided} you
+are using a POSIX-compliant shell, such as the Bourne shell or Bash, the
+GNU Bourne-Again shell.  But the C shell (@code{csh}) behaves
+differently!  There, you must use two backslashes in a row, followed by
+a newline.  Note also that when using the C shell, @emph{every} newline
+in your awk program must be escaped with a backslash. To illustrate:
+
+@example
+% awk 'BEGIN @{ \
+?   print \\
+?       "hello, world" \
+? @}'
+@print{} hello, world
+@end example
+
+@noindent
+Here, the @samp{%} and @samp{?} are the C shell's primary and secondary
+prompts, analogous to the standard shell's @samp{$} and @samp{>}.
+
+@code{awk} is a line-oriented language.  Each rule's action has to
+begin on the same line as the pattern.  To have the pattern and action
+on separate lines, you @emph{must} use backslash continuation---there
+is no other way.
+
+@cindex backslash continuation and comments
+@cindex comments and backslash continuation
+Note that backslash continuation and comments do not mix. As soon
+as @code{awk} sees the @samp{#} that starts a comment, it ignores
+@emph{everything} on the rest of the line. For example:
+
+@example
+@group
+$ gawk 'BEGIN @{ print "dont panic" # a friendly \
+>                                    BEGIN rule
+> @}'
+@error{} gawk: cmd. line:2:                BEGIN rule
+@error{} gawk: cmd. line:2:                ^ parse error
+@end group
+@end example
+
+@noindent
+Here, it looks like the backslash would continue the comment onto the
+next line. However, the backslash-newline combination is never even
+noticed, since it is ``hidden'' inside the comment. Thus, the
+@samp{BEGIN} is noted as a syntax error.
+
+@cindex multiple statements on one line
+When @code{awk} statements within one rule are short, you might want to put
+more than one of them on a line.  You do this by separating the statements
+with a semicolon, @samp{;}.
+
+This also applies to the rules themselves.
+Thus, the previous program could have been written:
+
+@example
+/12/ @{ print $0 @} ; /21/ @{ print $0 @}
+@end example
+
+@noindent
+@strong{Note:} the requirement that rules on the same line must be
+separated with a semicolon was not in the original @code{awk}
+language; it was added for consistency with the treatment of statements
+within an action.
+
+@node Other Features, When, Statements/Lines, Getting Started
+@section Other Features of @code{awk}
+
+The @code{awk} language provides a number of predefined, or built-in variables, which
+your programs can use to get information from @code{awk}.  There are other
+variables your program can set to control how @code{awk} processes your
+data.
+
+In addition, @code{awk} provides a number of built-in functions for doing
+common computational and string related operations.
+
+As we develop our presentation of the @code{awk} language, we introduce
+most of the variables and many of the functions. They are defined
+systematically in @ref{Built-in Variables}, and
+@ref{Built-in, ,Built-in Functions}.
+
+@node When,  , Other Features, Getting Started
+@section When to Use @code{awk}
+
+@cindex when to use @code{awk}
+@cindex applications of @code{awk}
+You might wonder how @code{awk} might be useful for you.  Using
+utility programs, advanced patterns, field separators, arithmetic
+statements, and other selection criteria, you can produce much more
+complex output.  The @code{awk} language is very useful for producing
+reports from large amounts of raw data, such as summarizing information
+from the output of other utility programs like @code{ls}.  
+(@xref{More Complex, ,A More Complex Example}.)
+
+Programs written with @code{awk} are usually much smaller than they would
+be in other languages.  This makes @code{awk} programs easy to compose and
+use.  Often, @code{awk} programs can be quickly composed at your terminal,
+used once, and thrown away.  Since @code{awk} programs are interpreted, you
+can avoid the (usually lengthy) compilation part of the typical
+edit-compile-test-debug cycle of software development.
+
+Complex programs have been written in @code{awk}, including a complete
+retargetable assembler for eight-bit microprocessors (@pxref{Glossary}, for
+more information) and a microcode assembler for a special purpose Prolog
+computer.  However, @code{awk}'s capabilities are strained by tasks of
+such complexity.
+
+If you find yourself writing @code{awk} scripts of more than, say, a few
+hundred lines, you might consider using a different programming
+language.  Emacs Lisp is a good choice if you need sophisticated string
+or pattern matching capabilities.  The shell is also good at string and
+pattern matching; in addition, it allows powerful use of the system
+utilities.  More conventional languages, such as C, C++, and Lisp, offer
+better facilities for system programming and for managing the complexity
+of large programs.  Programs in these languages may require more lines
+of source code than the equivalent @code{awk} programs, but they are
+easier to maintain and usually run more efficiently.
+
+@node One-liners, Regexp, Getting Started, Top
+@chapter Useful One Line Programs
+
+@cindex one-liners
+Many useful @code{awk} programs are short, just a line or two.  Here is a
+collection of useful, short programs to get you started.  Some of these
+programs contain constructs that haven't been covered yet.  The description
+of the program will give you a good idea of what is going on, but please
+read the rest of the @value{DOCUMENT} to become an @code{awk} expert!
+
+Most of the examples use a data file named @file{data}.  This is just a
+placeholder; if you were to use these programs yourself, you would substitute
+your own file names for @file{data}.
+
+@ifinfo
+Since you are reading this in Info, each line of the example code is
+enclosed in quotes, to represent text that you would type literally.
+The examples themselves represent shell commands that use single quotes
+to keep the shell from interpreting the contents of the program.
+When reading the examples, focus on the text between the open and close
+quotes.
+@end ifinfo
+
+@table @code
+@item awk '@{ if (length($0) > max) max = length($0) @}
+@itemx @ @ @ @ @ END @{ print max @}' data
+This program prints the length of the longest input line.
+
+@item awk 'length($0) > 80' data
+This program prints every line that is longer than 80 characters.  The sole
+rule has a relational expression as its pattern, and has no action (so the
+default action, printing the record, is used).
+
+@item expand@ data@ |@ awk@ '@{ if (x < length()) x = length() @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "maximum line length is " x @}'
+This program prints the length of the longest line in @file{data}.  The input
+is processed by the @code{expand} program to change tabs into spaces,
+so the widths compared are actually the right-margin columns.
+
+@item awk 'NF > 0' data
+This program prints every line that has at least one field.  This is an
+easy way to delete blank lines from a file (or rather, to create a new
+file similar to the old file but from which the blank lines have been
+deleted).
+
+@c Karl Berry points out that new users probably don't want to see
+@c multiple ways to do things, just the `best' way.  He's probably
+@c right.  At some point it might be worth adding something about there
+@c often being multiple ways to do things in awk, but for now we'll
+@c just take this one out.
+@ignore
+@item awk '@{ if (NF > 0) print @}' data
+This program also prints every line that has at least one field.  Here we
+allow the rule to match every line, and then decide in the action whether
+to print.
+@end ignore
+
+@item awk@ 'BEGIN@ @{@ for (i = 1; i <= 7; i++)
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ print int(101 * rand()) @}'
+This program prints seven random numbers from zero to 100, inclusive.
+
+@item ls -lg @var{files} | awk '@{ x += $5 @} ; END @{ print "total bytes: " x @}'
+This program prints the total number of bytes used by @var{files}.
+
+@item ls -lg @var{files} | awk '@{ x += $5 @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "total K-bytes: " (x + 1023)/1024 @}'
+This program prints the total number of kilobytes used by @var{files}.
+
+@item awk -F: '@{ print $1 @}' /etc/passwd | sort
+This program prints a sorted list of the login names of all users.
+
+@item awk 'END @{ print NR @}' data
+This program counts lines in a file.
+
+@item awk 'NR % 2 == 0' data
+This program prints the even numbered lines in the data file.
+If you were to use the expression @samp{NR % 2 == 1} instead,
+it would print the odd numbered lines.
+@end table
+
+@node Regexp, Reading Files, One-liners, Top
+@chapter Regular Expressions
+@cindex pattern, regular expressions
+@cindex regexp
+@cindex regular expression
+@cindex regular expressions as patterns
+
+A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
+set of strings.
+Because regular expressions are such a fundamental part of @code{awk}
+programming, their format and use deserve a separate chapter.
+
+A regular expression enclosed in slashes (@samp{/})
+is an @code{awk} pattern that matches every input record whose text
+belongs to that set.
+
+The simplest regular expression is a sequence of letters, numbers, or
+both.  Such a regexp matches any string that contains that sequence.
+Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
+Therefore, the pattern @code{/foo/} matches any input record containing
+the three characters @samp{foo}, @emph{anywhere} in the record.  Other
+kinds of regexps let you specify more complicated classes of strings.
+
+@iftex
+Initially, the examples will be simple. As we explain more about how
+regular expressions work, we will present more complicated examples.
+@end iftex
+
+@menu
+* Regexp Usage::                How to Use Regular Expressions.
+* Escape Sequences::            How to write non-printing characters.
+* Regexp Operators::            Regular Expression Operators.
+* GNU Regexp Operators::        Operators specific to GNU software.
+* Case-sensitivity::            How to do case-insensitive matching.
+* Leftmost Longest::            How much text matches.
+* Computed Regexps::            Using Dynamic Regexps.
+@end menu
+
+@node Regexp Usage, Escape Sequences, Regexp, Regexp
+@section How to Use Regular Expressions
+
+A regular expression can be used as a pattern by enclosing it in
+slashes.  Then the regular expression is tested against the
+entire text of each record.  (Normally, it only needs
+to match some part of the text in order to succeed.)  For example, this
+prints the second field of each record that contains the three
+characters @samp{foo} anywhere in it:
+
+@example
+@group
+$ awk '/foo/ @{ print $2 @}' BBS-list
+@print{} 555-1234
+@print{} 555-6699
+@print{} 555-6480
+@print{} 555-2127
+@end group
+@end example
+
+@cindex regexp matching operators
+@cindex string-matching operators
+@cindex operators, string-matching
+@cindex operators, regexp matching
+@cindex regexp match/non-match operators
+@cindex @code{~} operator
+@cindex @code{!~} operator
+Regular expressions can also be used in matching expressions.  These
+expressions allow you to specify the string to match against; it need
+not be the entire current input record.  The two operators, @samp{~}
+and @samp{!~}, perform regular expression comparisons.  Expressions
+using these operators can be used as patterns or in @code{if},
+@code{while}, @code{for}, and @code{do} statements.
+@ifinfo
+@c adding this xref in TeX screws up the formatting too much
+(@xref{Statements, ,Control Statements in Actions}.)
+@end ifinfo
+
+@table @code
+@item @var{exp} ~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a string)
+is matched by @var{regexp}.  The following example matches, or selects,
+all input records with the upper-case letter @samp{J} somewhere in the
+first field:
+
+@example
+@group
+$ awk '$1 ~ /J/' inventory-shipped
+@print{} Jan  13  25  15 115
+@print{} Jun  31  42  75 492
+@print{} Jul  24  34  67 436
+@print{} Jan  21  36  64 620
+@end group
+@end example
+
+So does this:
+
+@example
+awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
+@end example
+
+@item @var{exp} !~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is @emph{not} matched by @var{regexp}.  The following example matches,
+or selects, all input records whose first field @emph{does not} contain
+the upper-case letter @samp{J}:
+
+@example
+@group
+$ awk '$1 !~ /J/' inventory-shipped
+@print{} Feb  15  32  24 226
+@print{} Mar  15  24  34 228
+@print{} Apr  31  52  63 420
+@print{} May  16  34  29 208
+@dots{}
+@end group
+@end example
+@end table
+
+@cindex regexp constant
+When a regexp is written enclosed in slashes, like @code{/foo/}, we call it
+a @dfn{regexp constant}, much like @code{5.27} is a numeric constant, and
+@code{"foo"} is a string constant.
+
+@node Escape Sequences, Regexp Operators, Regexp Usage, Regexp
+@section Escape Sequences
+
+@cindex escape sequence notation
+Some characters cannot be included literally in string constants
+(@code{"foo"}) or regexp constants (@code{/foo/}).  You represent them
+instead with @dfn{escape sequences}, which are character sequences
+beginning with a backslash (@samp{\}).
+
+One use of an escape sequence is to include a double-quote character in
+a string constant.  Since a plain double-quote would end the string, you
+must use @samp{\"} to represent an actual double-quote character as a
+part of the string.  For example:
+
+@example
+$ awk 'BEGIN @{ print "He said \"hi!\" to her." @}'
+@print{} He said "hi!" to her.
+@end example
+
+The  backslash character itself is another character that cannot be
+included normally; you write @samp{\\} to put one backslash in the
+string or regexp.  Thus, the string whose contents are the two characters
+@samp{"} and @samp{\} must be written @code{"\"\\"}.
+
+Another use of backslash is to represent unprintable characters
+such as tab or newline.  While there is nothing to stop you from entering most
+unprintable characters directly in a string constant or regexp constant,
+they may look ugly.
+
+Here is a table of all the escape sequences used in @code{awk}, and
+what they represent. Unless noted otherwise, all of these escape
+sequences apply to both string constants and regexp constants.
+
+@c @cartouche
+@table @code
+@item \\
+A literal backslash, @samp{\}.
+
+@cindex @code{awk} language, V.4 version
+@item \a
+The ``alert'' character, @kbd{Control-g}, ASCII code 7 (BEL).
+
+@item \b
+Backspace, @kbd{Control-h}, ASCII code 8 (BS).
+
+@item \f
+Formfeed, @kbd{Control-l}, ASCII code 12 (FF).
+
+@item \n
+Newline, @kbd{Control-j}, ASCII code 10 (LF).
+
+@item \r
+Carriage return, @kbd{Control-m}, ASCII code 13 (CR).
+
+@item \t
+Horizontal tab, @kbd{Control-i}, ASCII code 9 (HT).
+
+@cindex @code{awk} language, V.4 version
+@item \v
+Vertical tab, @kbd{Control-k}, ASCII code 11 (VT).
+
+@item \@var{nnn}
+The octal value @var{nnn}, where @var{nnn} are one to three digits
+between @samp{0} and @samp{7}.  For example, the code for the ASCII ESC
+(escape) character is @samp{\033}.
+
+@cindex @code{awk} language, V.4 version
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item \x@var{hh}@dots{}
+The hexadecimal value @var{hh}, where @var{hh} are hexadecimal
+digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
+@samp{a} through @samp{f}).  Like the same construct in ANSI C, the escape
+sequence continues until the first non-hexadecimal digit is seen.  However,
+using more than two hexadecimal digits produces undefined results. (The
+@samp{\x} escape sequence is not allowed in POSIX @code{awk}.)
+
+@item \/
+A literal slash (necessary for regexp constants only).
+You use this when you wish to write a regexp
+constant that contains a slash. Since the regexp is delimited by
+slashes, you need to escape the slash that is part of the pattern,
+in order to tell @code{awk} to keep processing the rest of the regexp.
+
+@item \"
+A literal double-quote (necessary for string constants only).
+You use this when you wish to write a string
+constant that contains a double-quote. Since the string is delimited by
+double-quotes, you need to escape the quote that is part of the string,
+in order to tell @code{awk} to keep processing the rest of the string.
+@end table
+@c @end cartouche
+
+In @code{gawk}, there are additional two character sequences that begin
+with backslash that have special meaning in regexps.
+@xref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}}.
+
+In a string constant,
+what happens if you place a backslash before something that is not one of
+the characters listed above?  POSIX @code{awk} purposely leaves this case
+undefined.  There are two choices.
+
+@itemize @bullet
+@item
+Strip the backslash out.  This is what Unix @code{awk} and @code{gawk} both do.
+For example, @code{"a\qc"} is the same as @code{"aqc"}.
+
+@item
+Leave the backslash alone.  Some other @code{awk} implementations do this.
+In such implementations, @code{"a\qc"} is the same as if you had typed
+@code{"a\\qc"}.
+@end itemize
+
+In a regexp, a backslash before any character that is not in the above table,
+and not listed in
+@ref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}},
+means that the next character should be taken literally, even if it would
+normally be a regexp operator. E.g., @code{/a\+b/} matches the three
+characters @samp{a+b}.
+
+@cindex portability issues
+For complete portability, do not use a backslash before any character not
+listed in the table above.
+
+Another interesting question arises. Suppose you use an octal or hexadecimal
+escape to represent a regexp metacharacter
+(@pxref{Regexp Operators, ,  Regular Expression Operators}).
+Does @code{awk} treat the character as literal character, or as a regexp
+operator?
+
+@cindex dark corner
+It turns out that historically, such characters were taken literally (d.c.).
+However, the POSIX standard indicates that they should be treated
+as real metacharacters, and this is what @code{gawk} does.
+However, in compatibility mode (@pxref{Options, ,Command Line Options}),
+@code{gawk} treats the characters represented by octal and hexadecimal
+escape sequences literally when used in regexp constants. Thus,
+@code{/a\52b/} is equivalent to @code{/a\*b/}.
+
+To summarize:
+
+@enumerate 1
+@item
+The escape sequences in the table above are always processed first,
+for both string constants and regexp constants. This happens very early,
+as soon as @code{awk} reads your program.
+
+@item
+@code{gawk} processes both regexp constants and dynamic regexps
+(@pxref{Computed Regexps, ,Using Dynamic Regexps}),
+for the special operators listed in
+@ref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}}.
+
+@item
+A backslash before any other character means to treat that character
+literally.
+@end enumerate
+
+@node Regexp Operators, GNU Regexp Operators, Escape Sequences, Regexp
+@section Regular Expression Operators
+@cindex metacharacters
+@cindex regular expression metacharacters
+@cindex regexp operators
+
+You can combine regular expressions with the following characters,
+called @dfn{regular expression operators}, or @dfn{metacharacters}, to
+increase the power and versatility of regular expressions.
+
+The escape sequences described
+@iftex
+above
+@end iftex
+in @ref{Escape Sequences},
+are valid inside a regexp.  They are introduced by a @samp{\}.  They
+are recognized and converted into the corresponding real characters as
+the very first step in processing regexps.
+
+Here is a table of metacharacters.  All characters that are not escape
+sequences and that are not listed in the table stand for themselves.
+
+@table @code
+@item \
+This is used to suppress the special meaning of a character when
+matching.  For example:
+
+@example
+\$
+@end example
+
+@noindent
+matches the character @samp{$}.
+
+@c NEEDED
+@page
+@cindex anchors in regexps
+@cindex regexp, anchors
+@item ^
+This matches the beginning of a string.  For example:
+
+@example
+^@@chapter
+@end example
+
+@noindent
+matches the @samp{@@chapter} at the beginning of a string, and can be used
+to identify chapter beginnings in Texinfo source files.
+The @samp{^} is known as an @dfn{anchor}, since it anchors the pattern to
+matching only at the beginning of the string.
+
+It is important to realize that @samp{^} does not match the beginning of
+a line embedded in a string.  In this example the condition is not true:
+
+@example
+if ("line1\nLINE 2" ~ /^L/) @dots{}
+@end example
+
+@item $
+This is similar to @samp{^}, but it matches only at the end of a string.
+For example:
+
+@example
+p$
+@end example
+
+@noindent
+matches a record that ends with a @samp{p}.  The @samp{$} is also an anchor,
+and also does not match the end of a line embedded in a string.  In this
+example the condition is not true:
+
+@example
+if ("line1\nLINE 2" ~ /1$/) @dots{}
+@end example
+
+@item .
+The period, or dot, matches any single character,
+@emph{including} the newline character.  For example:
+
+@example
+.P
+@end example
+
+@noindent
+matches any single character followed by a @samp{P} in a string.  Using
+concatenation we can make a regular expression like @samp{U.A}, which
+matches any three-character sequence that begins with @samp{U} and ends
+with @samp{A}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In strict POSIX mode (@pxref{Options, ,Command Line Options}),
+@samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+Otherwise, @sc{nul} is just another character. Other versions of @code{awk}
+may not be able to match the @sc{nul} character.
+
+@ignore
+2e: Add stuff that character list is the POSIX terminology. In other
+    literature known as character set or character class.
+@end ignore
+
+@cindex character list
+@item [@dots{}]
+This is called a @dfn{character list}.  It matches any @emph{one} of the
+characters that are enclosed in the square brackets.  For example:
+
+@example
+[MVX]
+@end example
+
+@noindent
+matches any one of the characters @samp{M}, @samp{V}, or @samp{X} in a
+string.
+
+Ranges of characters are indicated by using a hyphen between the beginning
+and ending characters, and enclosing the whole thing in brackets.  For
+example:
+
+@example
+[0-9]
+@end example
+
+@noindent
+matches any digit.
+Multiple ranges are allowed. E.g., the list @code{@w{[A-Za-z0-9]}} is a
+common way to express the idea of ``all alphanumeric characters.''
+
+To include one of the characters @samp{\}, @samp{]}, @samp{-} or @samp{^} in a
+character list, put a @samp{\} in front of it.  For example:
+
+@example
+[d\]]
+@end example
+
+@noindent
+matches either @samp{d}, or @samp{]}.
+
+@cindex @code{egrep}
+This treatment of @samp{\} in character lists
+is compatible with other @code{awk}
+implementations, and is also mandated by POSIX.
+The regular expressions in @code{awk} are a superset
+of the POSIX specification for Extended Regular Expressions (EREs).
+POSIX EREs are based on the regular expressions accepted by the
+traditional @code{egrep} utility.
+
+@cindex character classes
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@dfn{Character classes} are a new feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but where the 
+actual characters themselves can vary from country to country and/or
+from character set to character set.  For example, the notion of what
+is an alphabetic character differs in the USA and in France.
+
+A character class is only valid in a regexp @emph{inside} the
+brackets of a character list.  Character classes consist of @samp{[:},
+a keyword denoting the class, and @samp{:]}.  Here are the character
+classes defined by the POSIX standard.
+
+@table @code
+@item [:alnum:]
+Alphanumeric characters.
+
+@item [:alpha:]
+Alphabetic characters.
+
+@item [:blank:]
+Space and tab characters.
+
+@item [:cntrl:]
+Control characters.
+
+@item [:digit:]
+Numeric characters.
+
+@item [:graph:]
+Characters that are printable and are also visible.
+(A space is printable, but not visible, while an @samp{a} is both.)
+
+@item [:lower:]
+Lower-case alphabetic characters.
+
+@item [:print:]
+Printable characters (characters that are not control characters.)
+
+@item [:punct:]
+Punctuation characters (characters that are not letter, digits,
+control characters, or space characters).
+
+@item [:space:]
+Space characters (such as space, tab, and formfeed, to name a few).
+
+@item [:upper:]
+Upper-case alphabetic characters.
+
+@item [:xdigit:]
+Characters that are hexadecimal digits.
+@end table
+
+For example, before the POSIX standard, to match alphanumeric
+characters, you had to write @code{/[A-Za-z0-9]/}.  If your
+character set had other alphabetic characters in it, this would not
+match them.  With the POSIX character classes, you can write
+@code{/[[:alnum:]]/}, and this will match @emph{all} the alphabetic
+and numeric characters in your character set.
+
+@cindex collating elements
+Two additional special sequences can appear in character lists.
+These apply to non-ASCII character sets, which can have single symbols
+(called @dfn{collating elements}) that are represented with more than one
+character, as well as several characters that are equivalent for
+@dfn{collating}, or sorting, purposes.  (E.g., in French, a plain ``e''
+and a grave-accented ``@`e'' are equivalent.)
+
+@table @asis
+@cindex collating symbols
+@item Collating Symbols
+A @dfn{collating symbol} is a multi-character collating element enclosed in
+@samp{[.} and @samp{.]}.  For example, if @samp{ch} is a collating element,
+then @code{[[.ch.]]} is a regexp that matches this collating element, while
+@code{[ch]} is a regexp that matches either @samp{c} or @samp{h}.
+
+@cindex equivalence classes
+@item Equivalence Classes
+An @dfn{equivalence class} is a locale-specific name for a list of
+characters that are equivalent. The name is enclosed in
+@samp{[=} and @samp{=]}.
+For example, the name @samp{e} might be used to represent all of
+``e,'' ``@`e,'' and ``@'e.'' In this case, @code{[[=e]]} is a regexp
+that matches any of @samp{e}, @samp{@'e},  or @samp{@`e}.
+@end table
+
+These features are very valuable in non-English speaking locales.
+
+@strong{Caution:} The library functions that @code{gawk} uses for regular
+expression matching currently only recognize POSIX character classes;
+they do not recognize collating symbols or equivalence classes.
+@c maybe one day ...
+
+@cindex complemented character list
+@cindex character list, complemented
+@item [^ @dots{}]
+This is a @dfn{complemented character list}.  The first character after
+the @samp{[} @emph{must} be a @samp{^}.  It matches any characters
+@emph{except} those in the square brackets.  For example:
+
+@example
+[^0-9]
+@end example
+
+@noindent
+matches any character that is not a digit.
+
+@item |
+This is the @dfn{alternation operator}, and it is used to specify
+alternatives.  For example:
+
+@example
+^P|[0-9]
+@end example
+
+@noindent
+matches any string that matches either @samp{^P} or @samp{[0-9]}.  This
+means it matches any string that starts with @samp{P} or contains a digit.
+
+The alternation applies to the largest possible regexps on either side.
+In other words, @samp{|} has the lowest precedence of all the regular
+expression operators.
+
+@item (@dots{})
+Parentheses are used for grouping in regular expressions as in
+arithmetic.  They can be used to concatenate regular expressions
+containing the alternation operator, @samp{|}.  For example,
+@samp{@@(samp|code)\@{[^@}]+\@}} matches both @samp{@@code@{foo@}} and
+@samp{@@samp@{bar@}}. (These are Texinfo formatting control sequences.)
+
+@item *
+This symbol means that the preceding regular expression is to be
+repeated as many times as necessary to find a match.  For example:
+
+@example
+ph*
+@end example
+
+@noindent
+applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
+of one @samp{p} followed by any number of @samp{h}s.  This will also match
+just @samp{p} if no @samp{h}s are present.
+
+The @samp{*} repeats the @emph{smallest} possible preceding expression.
+(Use parentheses if you wish to repeat a larger expression.)  It finds
+as many repetitions as possible.  For example:
+
+@example
+awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
+@end example
+
+@noindent
+prints every record in @file{sample} containing a string of the form
+@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
+Notice the escaping of the parentheses by preceding them
+with backslashes.
+
+@item +
+This symbol is similar to @samp{*}, but the preceding expression must be
+matched at least once.  This means that:
+
+@example
+wh+y
+@end example
+
+@noindent
+would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas
+@samp{wh*y} would match all three of these strings.  This is a simpler
+way of writing the last @samp{*} example:
+
+@example
+awk '/\(c[ad]+r x\)/ @{ print @}' sample
+@end example
+
+@item ?
+This symbol is similar to @samp{*}, but the preceding expression can be
+matched either once or not at all.  For example:
+
+@example
+fe?d
+@end example
+
+@noindent
+will match @samp{fed} and @samp{fd}, but nothing else.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex interval expressions
+@item @{@var{n}@}
+@itemx @{@var{n},@}
+@itemx @{@var{n},@var{m}@}
+One or two numbers inside braces denote an @dfn{interval expression}.
+If there is one number in the braces, the preceding regexp is repeated
+@var{n} times.
+If there are two numbers separated by a comma, the preceding regexp is
+repeated @var{n} to @var{m} times.
+If there is one number followed by a comma, then the preceding regexp
+is repeated at least @var{n} times.
+
+@table @code
+@item wh@{3@}y
+matches @samp{whhhy} but not @samp{why} or @samp{whhhhy}.
+
+@item wh@{3,5@}y
+matches @samp{whhhy} or @samp{whhhhy} or @samp{whhhhhy}, only.
+
+@item wh@{2,@}y
+matches @samp{whhy} or @samp{whhhy}, and so on.
+@end table
+
+Interval expressions were not traditionally available in @code{awk}.
+As part of the POSIX standard they were added, to make @code{awk}
+and @code{egrep} consistent with each other.
+
+However, since old programs may use @samp{@{} and @samp{@}} in regexp
+constants, by default @code{gawk} does @emph{not} match interval expressions
+in regexps.  If either @samp{--posix} or @samp{--re-interval} are specified
+(@pxref{Options, , Command Line Options}), then interval expressions
+are allowed in regexps.
+@end table
+
+@cindex precedence, regexp operators
+@cindex regexp operators, precedence of
+In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators,
+as well as the braces @samp{@{} and @samp{@}},
+have
+the highest precedence, followed by concatenation, and finally by @samp{|}.
+As in arithmetic, parentheses can change how operators are grouped.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+character classes and interval expressions are not available in
+regular expressions.
+
+The next
+@ifinfo
+node
+@end ifinfo
+@iftex
+section
+@end iftex
+discusses the GNU-specific regexp operators, and provides
+more detail concerning how command line options affect the way @code{gawk}
+interprets the characters in regular expressions.
+
+@node GNU Regexp Operators, Case-sensitivity, Regexp Operators, Regexp
+@section Additional Regexp Operators Only in @code{gawk}
+
+@c This section adapted from the regex-0.12 manual
+
+@cindex regexp operators, GNU specific
+GNU software that deals with regular expressions provides a number of
+additional regexp operators.  These operators are described in this
+section, and are specific to @code{gawk}; they are not available in other
+@code{awk} implementations.
+
+@cindex word, regexp definition of
+Most of the additional operators are for dealing with word matching.
+For our purposes, a @dfn{word} is a sequence of one or more letters, digits,
+or underscores (@samp{_}).
+
+@table @code
+@cindex @code{\w} regexp operator
+@item \w
+This operator matches any word-constituent character, i.e.@: any
+letter, digit, or underscore. Think of it as a short-hand for
+@c @w{@code{[A-Za-z0-9_]}} or
+@w{@code{[[:alnum:]_]}}.
+
+@cindex @code{\W} regexp operator
+@item \W
+This operator matches any character that is not word-constituent.
+Think of it as a short-hand for
+@c @w{@code{[^A-Za-z0-9_]}} or
+@w{@code{[^[:alnum:]_]}}.
+
+@cindex @code{\<} regexp operator
+@item \<
+This operator matches the empty string at the beginning of a word.
+For example, @code{/\<away/} matches @samp{away}, but not
+@samp{stowaway}.
+
+@cindex @code{\>} regexp operator
+@item \>
+This operator matches the empty string at the end of a word.
+For example, @code{/stow\>/} matches @samp{stow}, but not @samp{stowaway}.
+
+@cindex @code{\y} regexp operator
+@cindex word boundaries, matching
+@item \y
+This operator matches the empty string at either the beginning or the
+end of a word (the word boundar@strong{y}).  For example, @samp{\yballs?\y}
+matches either @samp{ball} or @samp{balls} as a separate word.
+
+@cindex @code{\B} regexp operator
+@item \B
+This operator matches the empty string within a word. In other words,
+@samp{\B} matches the empty string that occurs between two
+word-constituent characters. For example,
+@code{/\Brat\B/} matches @samp{crate}, but it does not match @samp{dirty rat}.
+@samp{\B} is essentially the opposite of @samp{\y}.
+@end table
+
+There are two other operators that work on buffers.  In Emacs, a
+@dfn{buffer} is, naturally, an Emacs buffer.  For other programs, the
+regexp library routines that @code{gawk} uses consider the entire
+string to be matched as the buffer.
+
+For @code{awk}, since @samp{^} and @samp{$} always work in terms
+of the beginning and end of strings, these operators don't add any
+new capabilities.  They are provided for compatibility with other GNU
+software.
+
+@cindex buffer matching operators
+@table @code
+@cindex @code{\`} regexp operator
+@item \`
+This operator matches the empty string at the
+beginning of the buffer.
+
+@cindex @code{\'} regexp operator
+@item \'
+This operator matches the empty string at the
+end of the buffer.
+@end table
+
+In other GNU software, the word boundary operator is @samp{\b}. However,
+that conflicts with the @code{awk} language's definition of @samp{\b}
+as backspace, so @code{gawk} uses a different letter.
+
+An alternative method would have been to require two backslashes in the
+GNU operators, but this was deemed to be too confusing, and the current
+method of using @samp{\y} for the GNU @samp{\b} appears to be the
+lesser of two evils.
+
+@c NOTE!!! Keep this in sync with the same table in the summary appendix!
+@cindex regexp, effect of command line options
+The various command line options
+(@pxref{Options, ,Command Line Options})
+control how @code{gawk} interprets characters in regexps.
+
+@table @asis
+@item No options
+In the default case, @code{gawk} provide all the facilities of
+POSIX regexps and the GNU regexp operators described
+@iftex
+above.
+@end iftex
+@ifinfo
+in @ref{Regexp Operators, ,Regular Expression Operators}.
+@end ifinfo
+However, interval expressions are not supported.
+
+@item @code{--posix}
+Only POSIX regexps are supported, the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}).  Interval expressions
+are allowed.
+
+@item @code{--traditional}
+Traditional Unix @code{awk} regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes (@code{[[:alnum:]]} and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, even if @samp{--traditional}
+has been provided.
+@end table
+
+@node Case-sensitivity, Leftmost Longest, GNU Regexp Operators, Regexp
+@section Case-sensitivity in Matching
+
+@cindex case sensitivity
+@cindex ignoring case
+Case is normally significant in regular expressions, both when matching
+ordinary characters (i.e.@: not metacharacters), and inside character
+sets.  Thus a @samp{w} in a regular expression matches only a lower-case
+@samp{w} and not an upper-case @samp{W}.
+
+The simplest way to do a case-independent match is to use a character
+list: @samp{[Ww]}.  However, this can be cumbersome if you need to use it
+often; and it can make the regular expressions harder to
+read.  There are two alternatives that you might prefer.
+
+One way to do a case-insensitive match at a particular point in the
+program is to convert the data to a single case, using the
+@code{tolower} or @code{toupper} built-in string functions (which we
+haven't discussed yet;
+@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For example:
+
+@example
+tolower($1) ~ /foo/  @{ @dots{} @}
+@end example
+
+@noindent
+converts the first field to lower-case before matching against it.
+This will work in any POSIX-compliant implementation of @code{awk}.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex @code{~} operator
+@cindex @code{!~} operator
+@vindex IGNORECASE
+Another method, specific to @code{gawk}, is to set the variable
+@code{IGNORECASE} to a non-zero value (@pxref{Built-in Variables}).
+When @code{IGNORECASE} is not zero, @emph{all} regexp and string
+operations ignore case.  Changing the value of
+@code{IGNORECASE} dynamically controls the case sensitivity of your
+program as it runs.  Case is significant by default because
+@code{IGNORECASE} (like most variables) is initialized to zero.
+
+@example
+@group
+x = "aB"
+if (x ~ /ab/) @dots{}   # this test will fail
+@end group
+
+@group
+IGNORECASE = 1
+if (x ~ /ab/) @dots{}   # now it will succeed
+@end group
+@end example
+
+In general, you cannot use @code{IGNORECASE} to make certain rules
+case-insensitive and other rules case-sensitive, because there is no way
+to set @code{IGNORECASE} just for the pattern of a particular rule.
+@ignore
+This isn't quite true. Consider:
+
+	IGNORECASE=1 && /foObAr/ { .... }
+	IGNORECASE=0 || /foobar/ { .... }
+
+But that's pretty bad style and I don't want to get into it at this
+late date.
+@end ignore
+To do this, you must use character lists or @code{tolower}.  However, one
+thing you can do only with @code{IGNORECASE} is turn case-sensitivity on
+or off dynamically for all the rules at once.
+
+@code{IGNORECASE} can be set on the command line, or in a @code{BEGIN} rule
+(@pxref{Other Arguments, ,Other Command Line Arguments}; also
+@pxref{Using BEGIN/END, ,Startup and Cleanup Actions}).
+Setting @code{IGNORECASE} from the command line is a way to make
+a program case-insensitive without having to edit it.
+
+Prior to version 3.0 of @code{gawk}, the value of @code{IGNORECASE}
+only affected regexp operations. It did not affect string comparison
+with @samp{==}, @samp{!=}, and so on.
+Beginning with version 3.0, both regexp and string comparison
+operations are affected by @code{IGNORECASE}.
+
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+Beginning with version 3.0 of @code{gawk}, the equivalences between upper-case
+and lower-case characters are based on the ISO-8859-1 (ISO Latin-1)
+character set. This character set is a superset of the traditional 128
+ASCII characters, that also provides a number of characters suitable
+for use with European languages.
+@ignore
+A pure ASCII character set can be used instead if @code{gawk} is compiled
+with @samp{-DUSE_PURE_ASCII}.
+@end ignore
+
+The value of @code{IGNORECASE} has no effect if @code{gawk} is in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+Case is always significant in compatibility mode.
+
+@node Leftmost Longest, Computed Regexps, Case-sensitivity, Regexp
+@section How Much Text Matches?
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+Consider the following example:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub} function (which we haven't discussed yet,
+@pxref{String Functions, ,Built-in Functions for String Manipulation})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.  What will the output be?
+In other words, how many is ``one or more''---will @code{awk} match two,
+three, or all four @samp{a} characters?
+
+The answer is, @code{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match.  Thus, in this example, all four @samp{a} characters are
+replaced with @samp{<A>}.
+
+@example
+$ echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+regexp-based field and record splitting, and
+text matching and substitutions with the @code{match}, @code{sub}, @code{gsub},
+and @code{gensub} functions, it is very important.
+@ifinfo
+@xref{String Functions, ,Built-in Functions for String Manipulation},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records, ,How Input is Split into Records},
+and also @pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@node Computed Regexps, , Leftmost Longest, Regexp
+@section Using Dynamic Regexps
+
+@cindex computed regular expressions
+@cindex regular expressions, computed
+@cindex dynamic regular expressions
+@cindex regexp, dynamic
+@cindex @code{~} operator
+@cindex @code{!~} operator
+The right hand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e.@: a string of characters between slashes).  It may
+be any expression.  The expression is evaluated, and converted if
+necessary to a string; the contents of the string are used as the
+regexp.  A regexp that is computed in this way is called a @dfn{dynamic
+regexp}.  For example:
+
+@example
+BEGIN @{ identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+" @}
+$0 ~ identifier_regexp    @{ print @}
+@end example
+
+@noindent
+sets @code{identifier_regexp} to a regexp that describes @code{awk}
+variable names, and tests if the input record matches this regexp.
+
+@strong{Caution:} When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes, and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is in essence scanned @emph{twice}; the first time when
+@code{awk} reads your program, and the second time when it goes to
+match the string on the left-hand side of the operator with the pattern
+on the right.  This is true of any string valued expression (such as
+@code{identifier_regexp} above), not just string constants.
+
+@cindex regexp constants, difference between slashes and quotes
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes.  To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed.  To do the same thing with a string,
+you would have to type @code{"\\*"}.  The first backslash escapes the
+second one, so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use?  The answer is ``regexp
+constants,'' for several reasons.
+
+@enumerate 1
+@item
+String constants are more complicated to write, and
+more difficult to read. Using regexp constants makes your programs
+less error-prone.  Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is also more efficient to use regexp constants: @code{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient.  When using a string constant,
+@code{awk} must first convert the string into this internal form, and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better style; it shows clearly that you
+intend a regexp match.
+@end enumerate
+
+@node Reading Files, Printing, Regexp, Top
+@chapter Reading Input Files
+
+@cindex reading files
+@cindex input
+@cindex standard input
+@vindex FILENAME
+In the typical @code{awk} program, all input is read either from the
+standard input (by default the keyboard, but often a pipe from another
+command) or from files whose names you specify on the @code{awk} command
+line.  If you specify input files, @code{awk} reads them in order, reading
+all the data from one before going on to the next.  The name of the current
+input file can be found in the built-in variable @code{FILENAME}
+(@pxref{Built-in Variables}).
+
+The input is read in units called @dfn{records}, and processed by the
+rules of your program one record at a time.
+By default, each record is one line.  Each
+record is automatically split into chunks called @dfn{fields}.
+This makes it more convenient for programs to work on the parts of a record.
+
+On rare occasions you will need to use the @code{getline} command.
+The  @code{getline} command is valuable, both because it
+can do explicit input from any number of files, and because the files
+used with it do not have to be named on the @code{awk} command line
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+
+@menu
+* Records::                     Controlling how data is split into records.
+* Fields::                      An introduction to fields.
+* Non-Constant Fields::         Non-constant Field Numbers.
+* Changing Fields::             Changing the Contents of a Field.
+* Field Separators::            The field separator and how to change it.
+* Constant Size::               Reading constant width data.
+* Multiple Line::               Reading multi-line records.
+* Getline::                     Reading files under explicit program control
+                                using the @code{getline} function.
+@end menu
+
+@node Records, Fields, Reading Files, Reading Files
+@section How Input is Split into Records
+
+@cindex record separator, @code{RS}
+@cindex changing the record separator
+@cindex record, definition of
+@vindex RS
+The @code{awk} utility divides the input for your @code{awk}
+program into records and fields.
+Records are separated by a character called the @dfn{record separator}.
+By default, the record separator is the newline character.
+This is why records are, by default, single lines.
+You can use a different character for the record separator by
+assigning the character to the built-in variable @code{RS}.
+
+You can change the value of @code{RS} in the @code{awk} program,
+like any other variable, with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+The new record-separator character should be enclosed in quotation marks,
+which indicate
+a string constant.  Often the right time to do this is at the beginning
+of execution, before any input has been processed, so that the very
+first record will be read with the proper separator.  To do this, use
+the special @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).  For
+example:
+
+@example
+awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+changes the value of @code{RS} to @code{"/"}, before reading any input.
+This is a string whose first character is a slash; as a result, records
+are separated by slashes.  Then the input file is read, and the second
+rule in the @code{awk} program (the action with no pattern) prints each
+record.  Since each @code{print} statement adds a newline at the end of
+its output, the effect of this @code{awk} program is to copy the input
+with each slash changed to a newline.  Here are the results of running
+the program on @file{BBS-list}:
+
+@example
+@group
+$ awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@print{} aardvark     555-5553     1200
+@print{} 300          B
+@print{} alpo-net     555-3412     2400
+@print{} 1200
+@print{} 300     A
+@print{} barfly       555-7685     1200
+@print{} 300          A
+@print{} bites        555-1675     2400
+@print{} 1200
+@print{} 300     A
+@print{} camelot      555-0542     300               C
+@print{} core         555-2912     1200
+@print{} 300          C
+@print{} fooey        555-1234     2400
+@print{} 1200
+@print{} 300     B
+@print{} foot         555-6699     1200
+@print{} 300          B
+@print{} macfoo       555-6480     1200
+@print{} 300          A
+@print{} sdace        555-3430     2400
+@print{} 1200
+@print{} 300     A
+@print{} sabafoo      555-2127     1200
+@print{} 300          C
+@print{}
+@end group
+@end example
+
+@noindent
+Note that the entry for the @samp{camelot} BBS is not split.
+In the original data file
+(@pxref{Sample Data Files,  , Data Files for the Examples}),
+the line looks like this:
+
+@example
+camelot      555-0542     300               C
+@end example
+
+@noindent
+It only has one baud rate; there are no slashes in the record.
+
+Another way to change the record separator is on the command line,
+using the variable-assignment feature
+(@pxref{Other Arguments, ,Other Command Line Arguments}).
+
+@example
+awk '@{ print $0 @}' RS="/" BBS-list
+@end example
+
+@noindent
+This sets @code{RS} to @samp{/} before processing @file{BBS-list}.
+
+Using an unusual character such as @samp{/} for the record separator
+produces correct behavior in the vast majority of cases.  However,
+the following (extreme) pipeline prints a surprising @samp{1}.  There
+is one field, consisting of a newline.  The value of the built-in
+variable @code{NF} is the number of fields in the current record.
+
+@example
+$ echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
+@print{} 1
+@end example
+
+@cindex dark corner
+@noindent
+Reaching the end of an input file terminates the current input record,
+even if the last character in the file is not the character in @code{RS}
+(d.c.).
+
+@cindex empty string
+The empty string, @code{""} (a string of no characters), has a special meaning
+as the value of @code{RS}: it means that records are separated
+by one or more blank lines, and nothing else.
+@xref{Multiple Line, ,Multiple-Line Records}, for more details.
+
+If you change the value of @code{RS} in the middle of an @code{awk} run,
+the new value is used to delimit subsequent records, but the record
+currently being processed (and records already processed) are not
+affected.
+
+@vindex RT
+@cindex record terminator, @code{RT}
+@cindex terminator, record
+@cindex differences between @code{gawk} and @code{awk}
+After the end of the record has been determined, @code{gawk}
+sets the variable @code{RT} to the text in the input that matched
+@code{RS}.
+
+@cindex regular expressions as record separators
+The value of @code{RS} is in fact not limited to a one-character
+string.  It can be any regular expression
+(@pxref{Regexp, ,Regular Expressions}).
+In general, each record
+ends at the next string that matches the regular expression; the next
+record starts at the end of the matching string.  This general rule is
+actually at work in the usual case, where @code{RS} contains just a
+newline: a record ends at the beginning of the next matching string (the
+next newline in the input) and the following record starts just after
+the end of this string (at the first character of the following line).
+The newline, since it matches @code{RS}, is not part of either record.
+
+When @code{RS} is a single character, @code{RT} will
+contain the same single character. However, when @code{RS} is a
+regular expression, then @code{RT} becomes more useful; it contains
+the actual input text that matched the regular expression.
+
+The following example illustrates both of these features.
+It sets @code{RS} equal to a regular expression that
+matches either a newline, or a series of one or more upper-case letters
+with optional leading and/or trailing white space
+(@pxref{Regexp, , Regular Expressions}).
+
+@example
+$ echo record 1 AAAA record 2 BBBB record 3 |
+> gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}
+>             @{ print "Record =", $0, "and RT =", RT @}'
+@print{} Record = record 1 and RT =  AAAA 
+@print{} Record = record 2 and RT =  BBBB 
+@print{} Record = record 3 and RT = 
+@print{}
+@end example
+
+@noindent
+The final line of output has an extra blank line. This is because the
+value of @code{RT} is a newline, and then the @code{print} statement
+supplies its own terminating newline.
+
+@xref{Simple Sed, ,A Simple Stream Editor}, for a more useful example
+of @code{RS} as a regexp and @code{RT}.
+
+@cindex differences between @code{gawk} and @code{awk}
+The use of @code{RS} as a regular expression and the @code{RT}
+variable are @code{gawk} extensions; they are not available in
+compatibility mode
+(@pxref{Options, ,Command Line Options}).
+In compatibility mode, only the first character of the value of
+@code{RS} is used to determine the end of the record.
+
+@cindex number of records, @code{NR}, @code{FNR}
+@vindex NR
+@vindex FNR
+The @code{awk} utility keeps track of the number of records that have
+been read so far from the current input file.  This value is stored in a
+built-in variable called @code{FNR}.  It is reset to zero when a new
+file is started.  Another built-in variable, @code{NR}, is the total
+number of input records read so far from all data files.  It starts at zero
+but is never automatically reset to zero.
+
+@node Fields, Non-Constant Fields, Records, Reading Files
+@section Examining Fields
+
+@cindex examining fields
+@cindex fields
+@cindex accessing fields
+When @code{awk} reads an input record, the record is
+automatically separated or @dfn{parsed} by the interpreter into chunks
+called @dfn{fields}.  By default, fields are separated by whitespace,
+like words in a line.
+Whitespace in @code{awk} means any string of one or more spaces,
+tabs or newlines;@footnote{In POSIX @code{awk}, newlines are not
+considered whitespace for separating fields.} other characters such as
+formfeed, and so on, that are
+considered whitespace by other languages are @emph{not} considered
+whitespace by @code{awk}.
+
+The purpose of fields is to make it more convenient for you to refer to
+these pieces of the record.  You don't have to use them---you can
+operate on the whole record if you wish---but fields are what make
+simple @code{awk} programs so powerful.
+
+@cindex @code{$} (field operator)
+@cindex field operator @code{$}
+To refer to a field in an @code{awk} program, you use a dollar-sign,
+@samp{$}, followed by the number of the field you want.  Thus, @code{$1}
+refers to the first field, @code{$2} to the second, and so on.  For
+example, suppose the following is a line of input:
+
+@example
+This seems like a pretty nice example.
+@end example
+
+@noindent
+Here the first field, or @code{$1}, is @samp{This}; the second field, or
+@code{$2}, is @samp{seems}; and so on.  Note that the last field,
+@code{$7}, is @samp{example.}.  Because there is no space between the
+@samp{e} and the @samp{.}, the period is considered part of the seventh
+field.
+
+@vindex NF
+@cindex number of fields, @code{NF}
+@code{NF} is a built-in variable whose value
+is the number of fields in the current record.
+@code{awk} updates the value of @code{NF} automatically, each time
+a record is read.
+
+No matter how many fields there are, the last field in a record can be
+represented by @code{$NF}.  So, in the example above, @code{$NF} would
+be the same as @code{$7}, which is @samp{example.}.  Why this works is
+explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
+If you try to reference a field beyond the last one, such as @code{$8}
+when the record has only seven fields, you get the empty string.
+@c the empty string acts like 0 in some contexts, but I don't want to
+@c get into that here....
+
+@code{$0}, which looks like a reference to the ``zeroth'' field, is
+a special case: it represents the whole input record.  @code{$0} is
+used when you are not interested in fields.
+
+Here are some more examples:
+
+@example
+@group
+$ awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
+@print{} fooey        555-1234     2400/1200/300     B
+@print{} foot         555-6699     1200/300          B
+@print{} macfoo       555-6480     1200/300          A
+@print{} sabafoo      555-2127     1200/300          C
+@end group
+@end example
+
+@noindent
+This example prints each record in the file @file{BBS-list} whose first
+field contains the string @samp{foo}.  The operator @samp{~} is called a
+@dfn{matching operator}
+(@pxref{Regexp Usage, , How to Use Regular Expressions});
+it tests whether a string (here, the field @code{$1}) matches a given regular
+expression.
+
+By contrast, the following example
+looks for @samp{foo} in @emph{the entire record} and prints the first
+field and the last field for each input record containing a
+match.
+
+@example
+@group
+$ awk '/foo/ @{ print $1, $NF @}' BBS-list
+@print{} fooey B
+@print{} foot B
+@print{} macfoo A
+@print{} sabafoo C
+@end group
+@end example
+
+@node Non-Constant Fields, Changing Fields, Fields, Reading Files
+@section Non-constant Field Numbers
+
+The number of a field does not need to be a constant.  Any expression in
+the @code{awk} language can be used after a @samp{$} to refer to a
+field.  The value of the expression specifies the field number.  If the
+value is a string, rather than a number, it is converted to a number.
+Consider this example:
+
+@example
+awk '@{ print $NR @}'
+@end example
+
+@noindent
+Recall that @code{NR} is the number of records read so far: one in the
+first record, two in the second, etc.  So this example prints the first
+field of the first record, the second field of the second record, and so
+on.  For the twentieth record, field number 20 is printed; most likely,
+the record has fewer than 20 fields, so this prints a blank line.
+
+Here is another example of using expressions as field numbers:
+
+@example
+awk '@{ print $(2*2) @}' BBS-list
+@end example
+
+@code{awk} must evaluate the expression @samp{(2*2)} and use
+its value as the number of the field to print.  The @samp{*} sign
+represents multiplication, so the expression @samp{2*2} evaluates to four.
+The parentheses are used so that the multiplication is done before the
+@samp{$} operation; they are necessary whenever there is a binary
+operator in the field-number expression.  This example, then, prints the
+hours of operation (the fourth field) for every line of the file
+@file{BBS-list}.  (All of the @code{awk} operators are listed, in
+order of decreasing precedence, in
+@ref{Precedence,  , Operator Precedence (How Operators Nest)}.)
+
+If the field number you compute is zero, you get the entire record.
+Thus, @code{$(2-2)} has the same value as @code{$0}.  Negative field
+numbers are not allowed; trying to reference one will usually terminate
+your running @code{awk} program.  (The POSIX standard does not define
+what happens when you reference a negative field number.  @code{gawk}
+will notice this and terminate your program.  Other @code{awk}
+implementations may behave differently.)
+
+As mentioned in @ref{Fields, ,Examining Fields},
+the number of fields in the current record is stored in the built-in
+variable @code{NF} (also @pxref{Built-in Variables}).  The expression
+@code{$NF} is not a special feature: it is the direct consequence of
+evaluating @code{NF} and using its value as a field number.
+
+@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
+@section Changing the Contents of a Field
+
+@cindex field, changing contents of
+@cindex changing contents of a field
+@cindex assignment to fields
+You can change the contents of a field as seen by @code{awk} within an
+@code{awk} program; this changes what @code{awk} perceives as the
+current input record.  (The actual input is untouched; @code{awk} @emph{never}
+modifies the input file.)
+
+Consider this example and its output:
+
+@example
+@group
+$ awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
+@print{} 13 3
+@print{} 15 5
+@print{} 15 5
+@dots{}
+@end group
+@end example
+
+@noindent
+The @samp{-} sign represents subtraction, so this program reassigns
+field three, @code{$3}, to be the value of field two minus ten,
+@samp{$2 - 10}.  (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
+Then field two, and the new value for field three, are printed.  
+
+In order for this to work, the text in field @code{$2} must make sense
+as a number; the string of characters must be converted to a number in
+order for the computer to do arithmetic on it.  The number resulting
+from the subtraction is converted back to a string of characters which
+then becomes field three.
+@xref{Conversion, ,Conversion of Strings and Numbers}.
+
+When you change the value of a field (as perceived by @code{awk}), the
+text of the input record is recalculated to contain the new field where
+the old one was.  Therefore, @code{$0} changes to reflect the altered
+field.  Thus, this program
+prints a copy of the input file, with 10 subtracted from the second
+field of each line.
+
+@example
+@group
+$ awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
+@print{} Jan 3 25 15 115
+@print{} Feb 5 32 24 226
+@print{} Mar 5 24 34 228
+@dots{}
+@end group
+@end example
+
+You can also assign contents to fields that are out of range.  For
+example:
+
+@example
+$ awk '@{ $6 = ($5 + $4 + $3 + $2)
+>        print $6 @}' inventory-shipped
+@print{} 168
+@print{} 297
+@print{} 301
+@dots{}
+@end example
+
+@noindent
+We've just created @code{$6}, whose value is the sum of fields
+@code{$2}, @code{$3}, @code{$4}, and @code{$5}.  The @samp{+} sign
+represents addition.  For the file @file{inventory-shipped}, @code{$6}
+represents the total number of parcels shipped for a particular month.
+
+Creating a new field changes @code{awk}'s internal copy of the current
+input record---the value of @code{$0}.  Thus, if you do @samp{print $0}
+after adding a field, the record printed includes the new field, with
+the appropriate number of field separators between it and the previously
+existing fields.
+
+This recomputation affects and is affected by
+@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}),
+and by a feature that has not been discussed yet,
+the @dfn{output field separator}, @code{OFS},
+which is used to separate the fields (@pxref{Output Separators}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.
+
+Note, however, that merely @emph{referencing} an out-of-range field
+does @emph{not} change the value of either @code{$0} or @code{NF}.
+Referencing an out-of-range field only produces an empty string.  For
+example:
+
+@example
+if ($(NF+1) != "")
+    print "can't happen"
+else
+    print "everything is normal"
+@end example
+
+@noindent
+should print @samp{everything is normal}, because @code{NF+1} is certain
+to be out of range.  (@xref{If Statement, ,The @code{if}-@code{else} Statement},
+for more information about @code{awk}'s @code{if-else} statements.
+@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions},
+for more information about the @samp{!=} operator.)
+
+It is important to note that making an assignment to an existing field
+will change the
+value of @code{$0}, but will not change the value of @code{NF},
+even when you assign the empty string to a field.  For example:
+
+@example
+@group
+$ echo a b c d | awk '@{ OFS = ":"; $2 = ""
+>                       print $0; print NF @}'
+@print{} a::c:d
+@print{} 4
+@end group
+@end example
+
+@noindent
+The field is still there; it just has an empty value.  You can tell
+because there are two colons in a row.
+
+This example shows what happens if you create a new field.
+
+@example
+$ echo a b c d | awk '@{ OFS = ":"; $2 = ""; $6 = "new"
+>                       print $0; print NF @}'
+@print{} a::c:d::new
+@print{} 6
+@end example
+
+@noindent
+The intervening field, @code{$5} is created with an empty value
+(indicated by the second pair of adjacent colons),
+and @code{NF} is updated with the value six.
+
+Finally, decrementing @code{NF} will lose the values of the fields
+after the new value of @code{NF}, and @code{$0} will be recomputed.
+Here is an example:
+
+@example
+$ echo a b c d e f | ../gawk '@{ print "NF =", NF; 
+>                               NF = 3; print $0 @}'
+@print{} NF = 6
+@print{} a b c
+@end example
+
+@node Field Separators, Constant Size, Changing Fields, Reading Files
+@section Specifying How Fields are Separated
+
+This section is rather long; it describes one of the most fundamental
+operations in @code{awk}.
+
+@menu
+* Basic Field Splitting::        How fields are split with single characters
+                                 or simple strings.
+* Regexp Field Splitting::       Using regexps as the field separator.
+* Single Character Fields::      Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Field Splitting Summary::      Some final points and a summary table.
+@end menu
+
+@node Basic Field Splitting, Regexp Field Splitting, Field Separators, Field Separators
+@subsection The Basics of Field Separating
+@vindex FS
+@cindex fields, separating
+@cindex field separator, @code{FS}
+
+The @dfn{field separator}, which is either a single character or a regular
+expression, controls the way @code{awk} splits an input record into fields.
+@code{awk} scans the input record for character sequences that
+match the separator; the fields themselves are the text between the matches.
+
+In the examples below, we use the bullet symbol ``@bullet{}'' to represent
+spaces in the output.
+
+If the field separator is @samp{oo}, then the following line:
+
+@example
+moo goo gai pan
+@end example
+
+@noindent
+would be split into three fields: @samp{m}, @samp{@bullet{}g} and
+@samp{@bullet{}gai@bullet{}pan}.
+Note the leading spaces in the values of the second and third fields.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+The field separator is represented by the built-in variable @code{FS}.
+Shell programmers take note!  @code{awk} does @emph{not} use the name @code{IFS}
+which is used by the POSIX compatible shells (such as the Bourne shell,
+@code{sh}, or the GNU Bourne-Again Shell, Bash).
+
+You can change the value of @code{FS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+Often the right time to do this is at the beginning of execution,
+before any input has been processed, so that the very first record
+will be read with the proper separator.  To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:
+
+@example
+awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
+@end example
+
+@noindent
+Given the input line,
+
+@example
+John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+this @code{awk} program extracts and prints the string
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+
+@cindex field separator, choice of
+@cindex regular expressions as field separators
+Sometimes your input data will contain separator characters that don't
+separate fields the way you thought they would.  For instance, the
+person's name in the example we just used might have a title or
+suffix attached, such as @samp{John Q. Smith, LXIX}.  From input
+containing such a name:
+
+@example
+John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+@c careful of an overfull hbox here!
+the above program would extract @samp{@bullet{}LXIX}, instead of
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+If you were expecting the program to print the
+address, you would be surprised.  The moral is: choose your data layout and
+separator characters carefully to prevent such problems.
+
+@iftex
+As you know, normally,
+@end iftex
+@ifinfo
+Normally,
+@end ifinfo
+fields are separated by whitespace sequences
+(spaces, tabs and newlines), not by single spaces: two spaces in a row do not
+delimit an empty field.  The default value of the field separator @code{FS}
+is a string containing a single space, @w{@code{" "}}.  If this value were
+interpreted in the usual way, each space character would separate
+fields, so two spaces in a row would make an empty field between them.
+The reason this does not happen is that a single space as the value of
+@code{FS} is a special case: it is taken to specify the default manner
+of delimiting fields.
+
+If @code{FS} is any other single character, such as @code{","}, then
+each occurrence of that character separates two fields.  Two consecutive
+occurrences delimit an empty field.  If the character occurs at the
+beginning or the end of the line, that too delimits an empty field.  The
+space character is the only single character which does not follow these
+rules.
+
+@node Regexp Field Splitting, Single Character Fields, Basic Field Splitting, Field Separators
+@subsection Using Regular Expressions to Separate Fields
+
+The previous
+@iftex
+subsection
+@end iftex
+@ifinfo
+node
+@end ifinfo
+discussed the use of single characters or simple strings as the
+value of @code{FS}.
+More generally, the value of @code{FS} may be a string containing any
+regular expression.  In this case, each match in the record for the regular
+expression separates fields.  For example, the assignment:
+
+@example
+FS = ", \t"
+@end example
+
+@noindent
+makes every area of an input line that consists of a comma followed by a
+space and a tab, into a field separator.  (@samp{\t}
+is an @dfn{escape sequence} that stands for a tab;
+@pxref{Escape Sequences},
+for the complete list of similar escape sequences.)
+
+For a less trivial example of a regular expression, suppose you want
+single spaces to separate fields the way single commas were used above.
+You can set @code{FS} to @w{@code{"[@ ]"}} (left bracket, space, right
+bracket).  This regular expression matches a single space and nothing else
+(@pxref{Regexp, ,Regular Expressions}).
+
+There is an important difference between the two cases of @samp{FS = @w{" "}}
+(a single space) and @samp{FS = @w{"[ \t\n]+"}} (left bracket, space,
+backslash, ``t'', backslash, ``n'', right bracket, which is a regular
+expression matching one or more spaces, tabs, or newlines).  For both
+values of @code{FS}, fields are separated by runs of spaces, tabs
+and/or newlines.  However, when the value of @code{FS} is @w{@code{"
+"}}, @code{awk} will first strip leading and trailing whitespace from
+the record, and then decide where the fields are.
+
+For example, the following pipeline prints @samp{b}:
+
+@example
+$ echo ' a b c d ' | awk '@{ print $2 @}'
+@print{} b
+@end example
+
+@noindent
+However, this pipeline prints @samp{a} (note the extra spaces around
+each letter):
+
+@example
+$ echo ' a  b  c  d ' | awk 'BEGIN @{ FS = "[ \t]+" @}
+>                                  @{ print $2 @}'
+@print{} a
+@end example
+
+@noindent
+@cindex null string
+@cindex empty string
+In this case, the first field is @dfn{null}, or empty.
+
+The stripping of leading and trailing whitespace also comes into
+play whenever @code{$0} is recomputed.  For instance, study this pipeline:
+
+@example
+$ echo '   a b c d' | awk '@{ print; $2 = $2; print @}'
+@print{}    a b c d
+@print{} a b c d
+@end example
+
+@noindent
+The first @code{print} statement prints the record as it was read,
+with leading whitespace intact.  The assignment to @code{$2} rebuilds
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
+separated by the value of @code{OFS}.  Since the leading whitespace
+was ignored when finding @code{$1}, it is not part of the new @code{$0}.
+Finally, the last @code{print} statement prints the new @code{$0}.
+
+@node Single Character Fields, Command Line Field Separator, Regexp Field Splitting, Field Separators
+@subsection Making Each Character a Separate Field
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex single character fields
+There are times when you may want to examine each character
+of a record separately.  In @code{gawk}, this is easy to do, you
+simply assign the null string (@code{""}) to @code{FS}. In this case,
+each individual character in the record will become a separate field.
+Here is an example:
+
+@example
+@group
+$ echo a b | gawk 'BEGIN @{ FS = "" @}
+>                  @{ 
+>                      for (i = 1; i <= NF; i = i + 1)
+>                          print "Field", i, "is", $i
+>                  @}'
+@print{} Field 1 is a
+@print{} Field 2 is
+@print{} Field 3 is b
+@end group
+@end example
+
+@cindex dark corner
+Traditionally, the behavior for @code{FS} equal to @code{""} was not defined.
+In this case, Unix @code{awk} would simply treat the entire record
+as only having one field (d.c.).  In compatibility mode
+(@pxref{Options, ,Command Line Options}),
+if @code{FS} is the null string, then @code{gawk} will also
+behave this way.
+
+@node Command Line Field Separator, Field Splitting Summary, Single Character Fields, Field Separators
+@subsection Setting @code{FS} from the Command Line
+@cindex @code{-F} option
+@cindex field separator, on command line
+@cindex command line, setting @code{FS} on
+
+@code{FS} can be set on the command line.  You use the @samp{-F} option to
+do so.  For example:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+@noindent
+sets @code{FS} to be the @samp{,} character.  Notice that the option uses
+a capital @samp{F}.  Contrast this with @samp{-f}, which specifies a file
+containing an @code{awk} program.  Case is significant in command line options:
+the @samp{-F} and @samp{-f} options have nothing to do with each other.
+You can use both options at the same time to set the @code{FS} variable
+@emph{and} get an @code{awk} program from a file.
+
+The value used for the argument to @samp{-F} is processed in exactly the
+same way as assignments to the built-in variable @code{FS}.  This means that
+if the field separator contains special characters, they must be escaped
+appropriately.  For example, to use a @samp{\} as the field separator, you
+would have to type:
+
+@example
+# same as FS = "\\" 
+awk -F\\\\ '@dots{}' files @dots{}
+@end example
+
+@noindent
+Since @samp{\} is used for quoting in the shell, @code{awk} will see
+@samp{-F\\}.  Then @code{awk} processes the @samp{\\} for escape
+characters (@pxref{Escape Sequences}), finally yielding
+a single @samp{\} to be used for the field separator.
+
+@cindex historical features
+As a special case, in compatibility mode
+(@pxref{Options, ,Command Line Options}), if the
+argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
+character.  This is because if you type @samp{-F\t} at the shell,
+without any quotes, the @samp{\} gets deleted, so @code{awk} figures that you
+really want your fields to be separated with tabs, and not @samp{t}s.
+Use @samp{-v FS="t"} on the command line if you really do want to separate
+your fields with @samp{t}s
+(@pxref{Options, ,Command Line Options}).
+
+For example, let's use an @code{awk} program file called @file{baud.awk}
+that contains the pattern @code{/300/}, and the action @samp{print $1}.
+Here is the program:
+
+@example
+/300/   @{ print $1 @}
+@end example
+
+Let's also set @code{FS} to be the @samp{-} character, and run the
+program on the file @file{BBS-list}.  The following command prints a
+list of the names of the bulletin boards that operate at 300 baud and
+the first three digits of their phone numbers:
+
+@c tweaked to make the tex output look better in @smallbook
+@example
+@group
+$ awk -F- -f baud.awk BBS-list
+@print{} aardvark     555
+@print{} alpo
+@print{} barfly       555
+@dots{}
+@end group
+@ignore
+@print{} bites        555
+@print{} camelot      555
+@print{} core         555
+@print{} fooey        555
+@print{} foot         555
+@print{} macfoo       555
+@print{} sdace        555
+@print{} sabafoo      555
+@end ignore
+@end example
+
+@noindent
+Note the second line of output.  In the original file
+(@pxref{Sample Data Files, ,Data Files for the Examples}),
+the second line looked like this:
+
+@example
+alpo-net     555-3412     2400/1200/300     A
+@end example
+
+The @samp{-} as part of the system's name was used as the field
+separator, instead of the @samp{-} in the phone number that was
+originally intended.  This demonstrates why you have to be careful in
+choosing your field and record separators.
+
+On many Unix systems, each user has a separate entry in the system password
+file, one line per user.  The information in these lines is separated
+by colons.  The first field is the user's logon name, and the second is
+the user's encrypted password.  A password file entry might look like this:
+
+@example
+arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@end example
+
+The following program searches the system password file, and prints
+the entries for users who have no password:
+
+@example
+awk -F: '$2 == ""' /etc/passwd
+@end example
+
+@node Field Splitting Summary,  , Command Line Field Separator, Field Separators
+@subsection Field Splitting Summary
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, @code{awk} is supposed to behave
+as if each record is split into fields at the time that it is read.
+In particular, this means that you can change the value of @code{FS}
+after a record is read, and the value of the fields (i.e.@: how they were split)
+should reflect the old value of @code{FS}, not the new one.
+
+@cindex dark corner
+@cindex @code{sed} utility
+@cindex stream editor
+However, many implementations of @code{awk} do not work this way.  Instead,
+they defer splitting the fields until a field is actually
+referenced.  The fields will be split
+using the @emph{current} value of @code{FS}! (d.c.)
+This behavior can be difficult
+to diagnose. The following example illustrates the difference
+between the two methods.
+(The @code{sed}@footnote{The @code{sed} utility is a ``stream editor.''
+Its behavior is also defined by the POSIX standard.}
+command prints just the first line of @file{/etc/passwd}.)
+
+@example
+sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
+@end example
+
+@noindent
+will usually print
+
+@example
+root
+@end example
+
+@noindent
+on an incorrect implementation of @code{awk}, while @code{gawk}
+will print something like
+
+@example
+root:nSijPlPhZZwgE:0:0:Root:/:
+@end example
+
+The following table summarizes how fields are split, based on the
+value of @code{FS}. (@samp{==} means ``is equal to.'')
+
+@c @cartouche
+@table @code
+@item FS == " "
+Fields are separated by runs of whitespace.  Leading and trailing
+whitespace are ignored.  This is the default.
+
+@item FS == @var{any other single character}
+Fields are separated by each occurrence of the character.  Multiple
+successive occurrences delimit empty fields, as do leading and
+trailing occurrences.
+The character can even be a regexp metacharacter; it does not need
+to be escaped.
+
+@item FS == @var{regexp}
+Fields are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty fields.
+
+@item FS == ""
+Each individual character in the record becomes a separate field.
+@end table
+@c @end cartouche
+
+@node Constant Size, Multiple Line, Field Separators, Reading Files
+@section Reading Fixed-width Data
+
+(This section discusses an advanced, experimental feature.  If you are
+a novice @code{awk} user, you may wish to skip it on the first reading.)
+
+@code{gawk} version 2.13 introduced a new facility for dealing with
+fixed-width fields with no distinctive field separator.  Data of this
+nature arises, for example, in  the input for old FORTRAN programs where
+numbers are run together; or in the output of programs that did not
+anticipate the use of their output as input for other programs.
+
+An example of the latter is a table where all the columns are lined up by
+the use of a variable number of spaces and @emph{empty fields are just
+spaces}.  Clearly, @code{awk}'s normal field splitting based on @code{FS}
+will not work well in this case.  Although a portable @code{awk} program
+can use a series of @code{substr} calls on @code{$0}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+this is awkward and inefficient for a large number of fields.
+
+The splitting of an input record into fixed-width fields is specified by
+assigning a string containing space-separated numbers to the built-in
+variable @code{FIELDWIDTHS}.  Each number specifies the width of the field
+@emph{including} columns between fields.  If you want to ignore the columns
+between fields, you can specify the width as a separate field that is
+subsequently ignored.
+
+The following data is the output of the Unix @code{w} utility.  It is useful
+to illustrate the use of @code{FIELDWIDTHS}.
+
+@example
+@group
+ 10:06pm  up 21 days, 14:04,  23 users
+User     tty       login@  idle   JCPU   PCPU  what
+hzuo     ttyV0     8:58pm            9      5  vi p24.tex 
+hzang    ttyV3     6:37pm    50                -csh 
+eklye    ttyV5     9:53pm            7      1  em thes.tex 
+dportein ttyV6     8:17pm  1:47                -csh 
+gierd    ttyD3    10:00pm     1                elm 
+dave     ttyD4     9:47pm            4      4  w 
+brent    ttyp0    26Jun91  4:46  26:46   4:41  bash 
+dave     ttyq4    26Jun9115days     46     46  wnewmail
+@end group 
+@end example
+
+The following program takes the above input, converts the idle time to
+number of seconds and prints out the first two fields and the calculated
+idle time.  (This program uses a number of @code{awk} features that
+haven't been introduced yet.)
+
+@example
+@group
+BEGIN  @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
+NR > 2 @{
+    idle = $4
+    sub(/^  */, "", idle)   # strip leading spaces
+    if (idle == "")
+        idle = 0
+    if (idle ~ /:/) @{
+        split(idle, t, ":")
+        idle = t[1] * 60 + t[2]
+    @}
+    if (idle ~ /days/)
+        idle *= 24 * 60 * 60
+ 
+    print $1, $2, idle
+@}
+@end group
+@end example
+
+Here is the result of running the program on the data:
+
+@example
+hzuo      ttyV0  0
+hzang     ttyV3  50
+eklye     ttyV5  0
+dportein  ttyV6  107
+gierd     ttyD3  1
+dave      ttyD4  0
+brent     ttyp0  286
+dave      ttyq4  1296000
+@end example
+
+Another (possibly more practical) example of fixed-width input data
+would be the input from a deck of balloting cards.  In some parts of
+the United States, voters mark their choices by punching holes in computer
+cards.  These cards are then processed to count the votes for any particular
+candidate or on any particular issue.  Since a voter may choose not to
+vote on some issue, any column on the card may be empty.  An @code{awk}
+program for processing such data could use the @code{FIELDWIDTHS} feature
+to simplify reading the data.  (Of course, getting @code{gawk} to run on
+a system with card readers is another story!)
+
+@ignore
+Exercise: Write a ballot card reading program
+@end ignore
+
+Assigning a value to @code{FS} causes @code{gawk} to return to using
+@code{FS} for field splitting.  Use @samp{FS = FS} to make this happen,
+without having to know the current value of @code{FS}.
+
+This feature is still experimental, and may evolve over time.
+Note that in particular, @code{gawk} does not attempt to verify
+the sanity of the values used in the value of @code{FIELDWIDTHS}.
+
+@node Multiple Line, Getline, Constant Size, Reading Files
+@section Multiple-Line Records
+
+@cindex multiple line records
+@cindex input, multiple line records
+@cindex reading files, multiple line records
+@cindex records, multiple line
+In some data bases, a single line cannot conveniently hold all the
+information in one entry.  In such cases, you can use multi-line
+records.
+
+The first step in doing this is to choose your data format: when records
+are not defined as single lines, how do you want to define them?
+What should separate records?
+
+One technique is to use an unusual character or string to separate
+records.  For example, you could use the formfeed character (written
+@samp{\f} in @code{awk}, as in C) to separate them, making each record
+a page of the file.  To do this, just set the variable @code{RS} to
+@code{"\f"} (a string containing the formfeed character).  Any
+other character could equally well be used, as long as it won't be part
+of the data in a record.
+
+Another technique is to have blank lines separate records.  By a special
+dispensation, an empty string as the value of @code{RS} indicates that
+records are separated by one or more blank lines.  If you set @code{RS}
+to the empty string, a record always ends at the first blank line
+encountered.  And the next record doesn't start until the first non-blank
+line that follows---no matter how many blank lines appear in a row, they
+are considered one record-separator.
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+You can achieve the same effect as @samp{RS = ""} by assigning the
+string @code{"\n\n+"} to @code{RS}. This regexp matches the newline
+at the end of the record, and one or more blank lines after the record.
+In addition, a regular expression always matches the longest possible
+sequence when there is a choice
+(@pxref{Leftmost Longest, ,How Much Text Matches?}).
+So the next record doesn't start until
+the first non-blank line that follows---no matter how many blank lines
+appear in a row, they are considered one record-separator.
+
+@cindex dark corner
+There is an important difference between @samp{RS = ""} and
+@samp{RS = "\n\n+"}. In the first case, leading newlines in the input
+data file are ignored, and if a file ends without extra blank lines
+after the last record, the final newline is removed from the record.
+In the second case, this special processing is not done (d.c.).
+
+Now that the input is separated into records, the second step is to
+separate the fields in the record.  One way to do this is to divide each
+of the lines into fields in the normal manner.  This happens by default
+as the result of a special feature: when @code{RS} is set to the empty
+string, the newline character @emph{always} acts as a field separator.
+This is in addition to whatever field separations result from @code{FS}.
+
+The original motivation for this special exception was probably to provide
+useful behavior in the default case (i.e.@: @code{FS} is equal
+to @w{@code{" "}}).  This feature can be a problem if you really don't
+want the newline character to separate fields, since there is no way to
+prevent it.  However, you can work around this by using the @code{split}
+function to break up the record manually
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+Another way to separate fields is to
+put each field on a separate line: to do this, just set the
+variable @code{FS} to the string @code{"\n"}.  (This simple regular
+expression matches a single newline.)
+
+A practical example of a data file organized this way might be a mailing
+list, where each entry is separated by blank lines.  If we have a mailing
+list in a file named @file{addresses}, that looks like this:
+
+@example
+Jane Doe
+123 Main Street
+Anywhere, SE 12345-6789
+
+John Smith
+456 Tree-lined Avenue
+Smallville, MW 98765-4321
+
+@dots{}
+@end example
+
+@noindent
+A simple program to process this file would look like this:
+
+@example
+@group
+# addrs.awk --- simple mailing list program
+
+# Records are separated by blank lines.
+# Each line is one field.
+BEGIN @{ RS = "" ; FS = "\n" @}
+
+@{
+      print "Name is:", $1
+      print "Address is:", $2
+      print "City and State are:", $3
+      print ""
+@}
+@end group
+@end example
+
+Running the program produces the following output:
+
+@example
+@group
+$ awk -f addrs.awk addresses
+@print{} Name is: Jane Doe
+@print{} Address is: 123 Main Street
+@print{} City and State are: Anywhere, SE 12345-6789
+@print{} 
+@end group
+@group
+@print{} Name is: John Smith
+@print{} Address is: 456 Tree-lined Avenue
+@print{} City and State are: Smallville, MW 98765-4321
+@print{} 
+@dots{}
+@end group
+@end example
+
+@xref{Labels Program, ,Printing Mailing Labels}, for a more realistic
+program that deals with address lists.
+
+The following table summarizes how records are split, based on the
+value of @code{RS}. (@samp{==} means ``is equal to.'')
+
+@c @cartouche
+@table @code
+@item RS == "\n"
+Records are separated by the newline character (@samp{\n}).  In effect,
+every line in the data file is a separate record, including blank lines.
+This is the default.
+
+@item RS == @var{any single character}
+Records are separated by each occurrence of the character.  Multiple
+successive occurrences delimit empty records.
+
+@item RS == ""
+Records are separated by runs of blank lines.  The newline character
+always serves as a field separator, in addition to whatever value
+@code{FS} may have. Leading and trailing newlines in a file are ignored.
+
+@item RS == @var{regexp}
+Records are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty records.
+@end table
+@c @end cartouche
+
+@vindex RT
+In all cases, @code{gawk} sets @code{RT} to the input text that matched the
+value specified by @code{RS}.
+
+@node Getline, , Multiple Line, Reading Files
+@section Explicit Input with @code{getline}
+
+@findex getline
+@cindex input, explicit
+@cindex explicit input
+@cindex input, @code{getline} command
+@cindex reading files, @code{getline} command
+So far we have been getting our input data from @code{awk}'s main
+input stream---either the standard input (usually your terminal, sometimes
+the output from another program) or from the
+files specified on the command line.  The @code{awk} language has a
+special built-in command called @code{getline} that
+can be used to read input under your explicit control.
+
+@menu
+* Getline Intro::            Introduction to the @code{getline} function.
+* Plain Getline::            Using @code{getline} with no arguments.
+* Getline/Variable::         Using @code{getline} into a variable.
+* Getline/File::             Using @code{getline} from a file.
+* Getline/Variable/File::    Using @code{getline} into a variable from a
+                             file.
+* Getline/Pipe::             Using @code{getline} from a pipe.
+* Getline/Variable/Pipe::    Using @code{getline} into a variable from a
+                             pipe.
+* Getline Summary::          Summary Of @code{getline} Variants.
+@end menu
+
+@node Getline Intro, Plain Getline, Getline, Getline
+@subsection Introduction to @code{getline}
+
+This command is used in several different ways, and should @emph{not} be
+used by beginners.  It is covered here because this is the chapter on input.
+The examples that follow the explanation of the @code{getline} command
+include material that has not been covered yet.  Therefore, come back
+and study the @code{getline} command @emph{after} you have reviewed the
+rest of this @value{DOCUMENT} and have a good knowledge of how @code{awk} works.
+
+@vindex ERRNO
+@cindex differences between @code{gawk} and @code{awk}
+@cindex @code{getline}, return values
+@code{getline} returns one if it finds a record, and zero if the end of the
+file is encountered.  If there is some error in getting a record, such
+as a file that cannot be opened, then @code{getline} returns @minus{}1.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+In the following examples, @var{command} stands for a string value that
+represents a shell command.
+
+@node Plain Getline, Getline/Variable, Getline Intro, Getline
+@subsection Using @code{getline} with No Arguments
+
+The @code{getline} command can be used without arguments to read input
+from the current input file.  All it does in this case is read the next
+input record and split it up into fields.  This is useful if you've
+finished processing the current record, but you want to do some special
+processing @emph{right now} on the next record.  Here's an
+example:
+
+@example
+@group
+awk '@{
+     if ((t = index($0, "/*")) != 0) @{
+          # value will be "" if t is 1
+          tmp = substr($0, 1, t - 1)
+          u = index(substr($0, t + 2), "*/")
+          while (u == 0) @{
+               if (getline <= 0) @{
+                    m = "unexpected EOF or error"
+                    m = (m ": " ERRNO)
+                    print m > "/dev/stderr"
+                    exit
+               @}
+               t = -1
+               u = index($0, "*/")
+          @}
+@end group
+@group
+          # substr expression will be "" if */
+          # occurred at end of line
+          $0 = tmp substr($0, t + u + 3)
+     @}
+     print $0
+@}'
+@end group
+@end example
+
+This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
+*/}, from the input.  By replacing the @samp{print $0} with other
+statements, you could perform more complicated processing on the
+decommented input, like searching for matches of a regular
+expression.  This program has a subtle problem---it does not work if one
+comment ends and another begins on the same line.
+
+@ignore
+Exercise,
+write a program that does handle multiple comments on the line.
+@end ignore
+
+This form of the @code{getline} command sets @code{NF} (the number of
+fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
+records read so far; @pxref{Records, ,How Input is Split into Records}),
+@code{FNR} (the number of records read from this input file), and the
+value of @code{$0}.
+
+@cindex dark corner
+@strong{Note:} the new value of @code{$0} is used in testing
+the patterns of any subsequent rules.  The original value
+of @code{$0} that triggered the rule which executed @code{getline}
+is lost (d.c.).
+By contrast, the @code{next} statement reads a new record
+but immediately begins processing it normally, starting with the first
+rule in the program.  @xref{Next Statement, ,The @code{next} Statement}.
+
+@node Getline/Variable, Getline/File, Plain Getline, Getline
+@subsection Using @code{getline} Into a Variable
+
+You can use @samp{getline @var{var}} to read the next record from
+@code{awk}'s input into the variable @var{var}.  No other processing is
+done.
+
+For example, suppose the next line is a comment, or a special string,
+and you want to read it, without triggering
+any rules.  This form of @code{getline} allows you to read that line
+and store it in a variable so that the main
+read-a-line-and-check-each-rule loop of @code{awk} never sees it.
+
+The following example swaps every two lines of input.  For example, given:
+
+@example
+wan
+tew
+free
+phore
+@end example
+
+@noindent
+it outputs:
+
+@example
+tew
+wan
+phore
+free
+@end example
+
+@noindent
+Here's the program:
+
+@example
+@group
+awk '@{
+     if ((getline tmp) > 0) @{
+          print tmp
+          print $0
+     @} else
+          print $0
+@}'
+@end group
+@end example
+
+The @code{getline} command used in this way sets only the variables
+@code{NR} and @code{FNR} (and of course, @var{var}).  The record is not
+split into fields, so the values of the fields (including @code{$0}) and
+the value of @code{NF} do not change.
+
+@node Getline/File, Getline/Variable/File, Getline/Variable, Getline
+@subsection Using @code{getline} from a File
+
+@cindex input redirection
+@cindex redirection of input
+Use @samp{getline < @var{file}} to read
+the next record from the file
+@var{file}.  Here @var{file} is a string-valued expression that
+specifies the file name.  @samp{< @var{file}} is called a @dfn{redirection}
+since it directs input to come from a different place.
+
+For example, the following
+program reads its input record from the file @file{secondary.input} when it
+encounters a first field with a value equal to 10 in the current input
+file.
+
+@example
+@group
+awk '@{
+    if ($1 == 10) @{
+         getline < "secondary.input"
+         print
+    @} else
+         print
+@}'
+@end group
+@end example
+
+Since the main input stream is not used, the values of @code{NR} and
+@code{FNR} are not changed.  But the record read is split into fields in
+the normal manner, so the values of @code{$0} and other fields are
+changed.  So is the value of @code{NF}.
+
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{getline < @var{expression}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{getline < (dir "/" file)} if you want your program
+to be portable to other @code{awk} implementations.
+
+@node Getline/Variable/File, Getline/Pipe, Getline/File, Getline
+@subsection Using @code{getline} Into a Variable from a File
+
+Use @samp{getline @var{var} < @var{file}} to read input
+the file
+@var{file} and put it in the variable @var{var}.  As above, @var{file}
+is a string-valued expression that specifies the file from which to read.
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.  The only variable
+changed is @var{var}.
+
+@ifinfo
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{getline @var{var} < @var{expression}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{getline < (dir "/" file)} if you want your program
+to be portable to other @code{awk} implementations.
+@end ifinfo
+
+For example, the following program copies all the input files to the
+output, except for records that say @w{@samp{@@include @var{filename}}}.
+Such a record is replaced by the contents of the file
+@var{filename}.
+
+@example
+@group
+awk '@{
+     if (NF == 2 && $1 == "@@include") @{
+          while ((getline line < $2) > 0)
+               print line
+          close($2)
+     @} else
+          print
+@}'
+@end group
+@end example
+
+Note here how the name of the extra input file is not built into
+the program; it is taken directly from the data, from the second field on
+the @samp{@@include} line.
+
+The @code{close} function is called to ensure that if two identical
+@samp{@@include} lines appear in the input, the entire specified file is
+included twice.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}.
+
+One deficiency of this program is that it does not process nested
+@samp{@@include} statements
+(@samp{@@include} statements in included files)
+the way a true macro preprocessor would.
+@xref{Igawk Program, ,An Easy Way to Use Library Functions}, for a program
+that does handle nested @samp{@@include} statements.
+
+@node Getline/Pipe, Getline/Variable/Pipe, Getline/Variable/File, Getline
+@subsection Using @code{getline} from a Pipe
+
+@cindex input pipeline
+@cindex pipeline, input
+You can pipe the output of a command into @code{getline}, using
+@samp{@var{command} | getline}.  In
+this case, the string @var{command} is run as a shell command and its output
+is piped into @code{awk} to be used as input.  This form of @code{getline}
+reads one record at a time from the pipe.
+
+For example, the following program copies its input to its output, except for
+lines that begin with @samp{@@execute}, which are replaced by the output
+produced by running the rest of the line as a shell command:
+
+@example
+@group
+awk '@{
+     if ($1 == "@@execute") @{
+          tmp = substr($0, 10)
+          while ((tmp | getline) > 0)
+               print
+          close(tmp)
+     @} else
+          print
+@}'
+@end group
+@end example
+
+@noindent
+The @code{close} function is called to ensure that if two identical
+@samp{@@execute} lines appear in the input, the command is run for
+each one.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}.
+@c Exercise!!
+@c This example is unrealistic, since you could just use system
+
+@c NEEDED
+@page
+Given the input:
+
+@example
+@group
+foo
+bar
+baz
+@@execute who
+bletch
+@end group
+@end example
+
+@noindent
+the program might produce:
+
+@example
+@group
+foo
+bar
+baz
+arnold     ttyv0   Jul 13 14:22
+miriam     ttyp0   Jul 13 14:23     (murphy:0)
+bill       ttyp1   Jul 13 14:23     (murphy:0)
+bletch
+@end group
+@end example
+
+@noindent
+Notice that this program ran the command @code{who} and printed the result.
+(If you try this program yourself, you will of course get different results,
+showing you who is logged in on your system.)
+
+This variation of @code{getline} splits the record into fields, sets the
+value of @code{NF} and recomputes the value of @code{$0}.  The values of
+@code{NR} and @code{FNR} are not changed.
+
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{@var{expression} | getline} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{"echo " "date" | getline} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{("echo " "date") | getline} if you want your program
+to be portable to other @code{awk} implementations.
+
+@node Getline/Variable/Pipe, Getline Summary, Getline/Pipe, Getline
+@subsection Using @code{getline} Into a Variable from a Pipe
+
+When you use @samp{@var{command} | getline @var{var}}, the
+output of the command @var{command} is sent through a pipe to
+@code{getline} and into the variable @var{var}.  For example, the
+following program reads the current date and time into the variable
+@code{current_time}, using the @code{date} utility, and then
+prints it.
+
+@example
+@group
+awk 'BEGIN @{
+     "date" | getline current_time
+     close("date")
+     print "Report printed on " current_time
+@}'
+@end group
+@end example
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.
+
+@ifinfo
+@c Thanks to Paul Eggert for initial wording here
+According to POSIX, @samp{@var{expression} | getline @var{var}} is ambiguous if
+@var{expression} contains unparenthesized operators other than
+@samp{$}; for example, @samp{"echo " "date" | getline @var{var}} is ambiguous
+because the concatenation operator is not parenthesized, and you should
+write it as @samp{("echo " "date") | getline @var{var}} if you want your
+program to be portable to other @code{awk} implementations.
+@end ifinfo
+
+@node Getline Summary,  , Getline/Variable/Pipe, Getline
+@subsection Summary of @code{getline} Variants
+
+With all the forms of @code{getline}, even though @code{$0} and @code{NF},
+may be updated, the record will not be tested against all the patterns
+in the @code{awk} program, in the way that would happen if the record
+were read normally by the main processing loop of @code{awk}.  However
+the new record is tested against any subsequent rules.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex limitations
+@cindex implementation limits
+Many @code{awk} implementations limit the number of pipelines an @code{awk}
+program may have open to just one!  In @code{gawk}, there is no such limit.
+You can open as many pipelines as the underlying operating system will
+permit.
+
+@vindex FILENAME
+@cindex dark corner
+@cindex @code{getline}, setting @code{FILENAME}
+@cindex @code{FILENAME}, being set by @code{getline}
+An interesting side-effect occurs if you use @code{getline} (without a
+redirection) inside a @code{BEGIN} rule. Since an unredirected @code{getline}
+reads from the command line data files, the first @code{getline} command
+causes @code{awk} to set the value of @code{FILENAME}. Normally,
+@code{FILENAME} does not have a value inside @code{BEGIN} rules, since you
+have not yet started to process the command line data files (d.c.).
+(@xref{BEGIN/END, , The @code{BEGIN} and @code{END} Special Patterns},
+also @pxref{Auto-set, , Built-in Variables that Convey Information}.)
+
+The following table summarizes the six variants of @code{getline},
+listing which built-in variables are set by each one.
+
+@c @cartouche
+@table @code
+@item getline
+sets @code{$0}, @code{NF}, @code{FNR}, and @code{NR}.
+
+@item getline @var{var}
+sets @var{var}, @code{FNR}, and @code{NR}.
+
+@item getline < @var{file}
+sets @code{$0}, and @code{NF}.
+
+@item getline @var{var} < @var{file}
+sets @var{var}.
+
+@item @var{command} | getline
+sets @code{$0}, and @code{NF}.
+
+@item @var{command} | getline @var{var}
+sets @var{var}.
+@end table
+@c @end cartouche
+
+@node Printing, Expressions, Reading Files, Top
+@chapter Printing Output
+
+@cindex printing
+@cindex output
+One of the most common actions is to @dfn{print}, or output,
+some or all of the input.  You use the @code{print} statement
+for simple output.  You use the @code{printf} statement
+for fancier formatting.  Both are described in this chapter.
+
+@menu
+* Print::                       The @code{print} statement.
+* Print Examples::              Simple examples of @code{print} statements.
+* Output Separators::           The output separators and how to change them.
+* OFMT::                        Controlling Numeric Output With @code{print}.
+* Printf::                      The @code{printf} statement.
+* Redirection::                 How to redirect output to multiple files and
+                                pipes.
+* Special Files::               File name interpretation in @code{gawk}.
+                                @code{gawk} allows access to inherited file
+                                descriptors.
+* Close Files And Pipes::       Closing Input and Output Files and Pipes.
+@end menu
+
+@node Print, Print Examples, Printing, Printing
+@section The @code{print} Statement
+@cindex @code{print} statement
+
+The @code{print} statement does output with simple, standardized
+formatting.  You specify only the strings or numbers to be printed, in a
+list separated by commas.  They are output, separated by single spaces,
+followed by a newline.  The statement looks like this:
+
+@example
+print @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of items may optionally be enclosed in parentheses.  The
+parentheses are necessary if any of the item expressions uses the @samp{>}
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+
+The items to be printed can be constant strings or numbers, fields of the
+current record (such as @code{$1}), variables, or any @code{awk}
+expressions.
+Numeric values are converted to strings, and then printed.
+
+The @code{print} statement is completely general for
+computing @emph{what} values to print. However, with two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(For the exceptions, @pxref{Output Separators}, and
+@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
+For that, you need the @code{printf} statement
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+
+The simple statement @samp{print} with no items is equivalent to
+@samp{print $0}: it prints the entire current record.  To print a blank
+line, use @samp{print ""}, where @code{""} is the empty string.
+
+To print a fixed piece of text, use a string constant such as
+@w{@code{"Don't Panic"}} as one item.  If you forget to use the
+double-quote characters, your text will be taken as an @code{awk}
+expression, and you will probably get an error.  Keep in mind that a
+space is printed between any two items.
+
+Each @code{print} statement makes at least one line of output.  But it
+isn't limited to one line.  If an item value is a string that contains a
+newline, the newline is output along with the rest of the string.  A
+single @code{print} can make any number of lines this way.
+
+@node Print Examples, Output Separators, Print, Printing
+@section Examples of @code{print} Statements
+
+Here is an example of printing a string that contains embedded newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; see @ref{Escape Sequences}):
+
+@example
+@group
+$ awk 'BEGIN @{ print "line one\nline two\nline three" @}'
+@print{} line one
+@print{} line two
+@print{} line three
+@end group
+@end example
+
+Here is an example that prints the first two fields of each input record,
+with a space between them:
+
+@example
+@group
+$ awk '@{ print $1, $2 @}' inventory-shipped
+@print{} Jan 13
+@print{} Feb 15
+@print{} Mar 15
+@dots{}
+@end group
+@end example
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+A common mistake in using the @code{print} statement is to omit the comma
+between two items.  This often has the effect of making the items run
+together in the output, with no space.  The reason for this is that
+juxtaposing two string expressions in @code{awk} means to concatenate
+them.  Here is the same program, without the comma:
+
+@example
+@group
+$ awk '@{ print $1 $2 @}' inventory-shipped
+@print{} Jan13
+@print{} Feb15
+@print{} Mar15
+@dots{}
+@end group
+@end example
+
+To someone unfamiliar with the file @file{inventory-shipped}, neither
+example's output makes much sense.  A heading line at the beginning
+would make it clearer.  Let's add some headings to our table of months
+(@code{$1}) and green crates shipped (@code{$2}).  We do this using the
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+to force the headings to be printed only once:
+
+@example
+awk 'BEGIN @{  print "Month Crates"
+              print "----- ------" @}
+           @{  print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Did you already guess what happens? When run, the program prints
+the following:
+
+@example
+@group
+Month Crates
+----- ------
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end group
+@end example
+
+@noindent
+The headings and the table data don't line up!  We can fix this by printing
+some spaces between the two fields:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+             print "----- ------" @}
+           @{ print $1, "     ", $2 @}' inventory-shipped
+@end example
+
+You can imagine that this way of lining up columns can get pretty
+complicated when you have many columns to fix.  Counting spaces for two
+or three columns can be simple, but more than this and you can get
+lost quite easily.  This is why the @code{printf} statement was
+created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
+one of its specialties is lining up columns of data.
+
+@cindex line continuation
+As a side point,
+you can continue either a @code{print} or @code{printf} statement simply
+by putting a newline after any comma
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@node Output Separators, OFMT, Print Examples, Printing
+@section Output Separators
+
+@cindex output field separator, @code{OFS}
+@cindex output record separator, @code{ORS}
+@vindex OFS
+@vindex ORS
+As mentioned previously, a @code{print} statement contains a list
+of items, separated by commas.  In the output, the items are normally
+separated by single spaces.  This need not be the case; a
+single space is only the default.  You can specify any string of
+characters to use as the @dfn{output field separator} by setting the
+built-in variable @code{OFS}.  The initial value of this variable
+is the string @w{@code{" "}}, that is, a single space.
+
+The output from an entire @code{print} statement is called an
+@dfn{output record}.  Each @code{print} statement outputs one output
+record and then outputs a string called the @dfn{output record separator}.
+The built-in variable @code{ORS} specifies this string.  The initial
+value of @code{ORS} is the string @code{"\n"}, i.e.@: a newline
+character; thus, normally each @code{print} statement makes a separate line.
+
+You can change how output fields and records are separated by assigning
+new values to the variables @code{OFS} and/or @code{ORS}.  The usual
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}), so
+that it happens before any input is processed.  You may also do this
+with assignments on the command line, before the names of your input
+files, or using the @samp{-v} command line option
+(@pxref{Options, ,Command Line Options}).
+
+@ignore
+Exercise,
+Rewrite the 
+@example
+awk 'BEGIN @{ print "Month Crates"
+             print "----- ------" @}
+           @{ print $1, "     ", $2 @}' inventory-shipped
+@end example
+program by using a new value of @code{OFS}.
+@end ignore
+
+The following example prints the first and second fields of each input
+record separated by a semicolon, with a blank line added after each
+line:
+
+@example
+@group
+$ awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
+>            @{ print $1, $2 @}' BBS-list
+@print{} aardvark;555-5553
+@print{} 
+@print{} alpo-net;555-3412
+@print{} 
+@print{} barfly;555-7685
+@dots{}
+@end group
+@end example
+
+If the value of @code{ORS} does not contain a newline, all your output
+will be run together on a single line, unless you output newlines some
+other way.
+
+@node OFMT, Printf, Output Separators, Printing
+@section Controlling Numeric Output with @code{print}
+@vindex OFMT
+@cindex numeric output format
+@cindex format, numeric output
+@cindex output format specifier, @code{OFMT}
+When you use the @code{print} statement to print numeric values,
+@code{awk} internally converts the number to a string of characters,
+and prints that string.  @code{awk} uses the @code{sprintf} function
+to do this conversion
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For now, it suffices to say that the @code{sprintf}
+function accepts a @dfn{format specification} that tells it how to format
+numbers (or strings), and that there are a number of different ways in which
+numbers can be formatted.  The different format specifications are discussed
+more fully in
+@ref{Control Letters, , Format-Control Letters}.
+
+The built-in variable @code{OFMT} contains the default format specification
+that @code{print} uses with @code{sprintf} when it wants to convert a
+number to a string for printing.
+The default value of @code{OFMT} is @code{"%.6g"}.
+By supplying different format specifications
+as the value of @code{OFMT}, you can change how @code{print} will print
+your numbers.  As a brief example:
+
+@example
+@group
+$ awk 'BEGIN @{
+>   OFMT = "%.0f"  # print numbers as integers (rounds)
+>   print 17.23 @}'
+@print{} 17
+@end group
+@end example
+
+@noindent
+@cindex dark corner
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, @code{awk}'s behavior will be undefined
+if @code{OFMT} contains anything but a floating point conversion specification
+(d.c.).
+
+@node Printf, Redirection, OFMT, Printing
+@section Using @code{printf} Statements for Fancier Printing
+@cindex formatted output
+@cindex output, formatted
+
+If you want more precise control over the output format than
+@code{print} gives you, use @code{printf}.  With @code{printf} you can
+specify the width to use for each item, and you can specify various
+formatting choices for numbers (such as what radix to use, whether to
+print an exponent, whether to print a sign, and how many digits to print
+after the decimal point).  You do this by supplying a string, called
+the @dfn{format string}, which controls how and where to print the other
+arguments.
+
+@menu
+* Basic Printf::                Syntax of the @code{printf} statement.
+* Control Letters::             Format-control letters.
+* Format Modifiers::            Format-specification modifiers.
+* Printf Examples::             Several examples.
+@end menu
+
+@node Basic Printf, Control Letters, Printf, Printf
+@subsection Introduction to the @code{printf} Statement
+
+@cindex @code{printf} statement, syntax of
+The @code{printf} statement looks like this:
+
+@example
+printf @var{format}, @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of arguments may optionally be enclosed in parentheses.  The
+parentheses are necessary if any of the item expressions use the @samp{>}
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+
+@cindex format string
+The difference between @code{printf} and @code{print} is the @var{format}
+argument.  This is an expression whose value is taken as a string; it
+specifies how to output each of the other arguments.  It is called
+the @dfn{format string}.
+
+The format string is very similar to that in the ANSI C library function
+@code{printf}.  Most of @var{format} is text to be output verbatim.
+Scattered among this text are @dfn{format specifiers}, one per item.
+Each format specifier says to output the next item in the argument list 
+at that place in the format.
+
+The @code{printf} statement does not automatically append a newline to its
+output.  It outputs only what the format string specifies.  So if you want
+a newline, you must include one in the format string.  The output separator
+variables @code{OFS} and @code{ORS} have no effect on @code{printf}
+statements. For example:
+
+@example
+@group
+BEGIN @{
+   ORS = "\nOUCH!\n"; OFS = "!"
+   msg = "Don't Panic!"; printf "%s\n", msg
+@}
+@end group
+@end example
+
+This program still prints the familiar @samp{Don't Panic!} message.
+
+@node Control Letters, Format Modifiers, Basic Printf, Printf
+@subsection Format-Control Letters
+@cindex @code{printf}, format-control characters
+@cindex format specifier
+
+A format specifier starts with the character @samp{%} and ends with a
+@dfn{format-control letter}; it tells the @code{printf} statement how
+to output one item.  (If you actually want to output a @samp{%}, write
+@samp{%%}.)  The format-control letter specifies what kind of value to
+print.  The rest of the format specifier is made up of optional
+@dfn{modifiers} which are parameters to use, such as the field width.
+
+Here is a list of the format-control letters:
+
+@table @code
+@item c
+This prints a number as an ASCII character.  Thus, @samp{printf "%c",
+65} outputs the letter @samp{A}.  The output for a string value is
+the first character of the string.
+
+@item d
+@itemx i
+These are equivalent. They both print a decimal integer.
+The @samp{%i} specification is for compatibility with ANSI C.
+
+@item e
+@itemx E
+This prints a number in scientific (exponential) notation.
+For example,
+
+@example
+printf "%4.3e\n", 1950
+@end example
+
+@noindent
+prints @samp{1.950e+03}, with a total of four significant figures of
+which three follow the decimal point.  The @samp{4.3} are modifiers,
+discussed below. @samp{%E} uses @samp{E} instead of @samp{e} in the output. 
+
+@item f
+This prints a number in floating point notation.
+For example,
+
+@example
+printf "%4.3f", 1950
+@end example
+
+@noindent
+prints @samp{1950.000}, with a total of four significant figures of
+which three follow the decimal point.  The @samp{4.3} are modifiers,
+discussed below.
+
+@item g
+@itemx G
+This prints a number in either scientific notation or floating point
+notation, whichever uses fewer characters. If the result is printed in
+scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
+
+@item o
+This prints an unsigned octal integer.
+(In octal, or base-eight notation, the digits run from @samp{0} to @samp{7};
+the decimal number eight is represented as @samp{10} in octal.)
+
+@item s
+This prints a string.
+
+@item x
+@itemx X
+This prints an unsigned hexadecimal integer.
+(In hexadecimal, or base-16 notation, the digits are @samp{0} through @samp{9}
+and @samp{a} through @samp{f}.  The hexadecimal digit @samp{f} represents
+the decimal number 15.) @samp{%X} uses the letters @samp{A} through @samp{F}
+instead of @samp{a} through @samp{f}.
+
+@item %
+This isn't really a format-control letter, but it does have a meaning
+when used after a @samp{%}: the sequence @samp{%%} outputs one
+@samp{%}.  It does not consume an argument, and it ignores any
+modifiers.
+@end table
+
+@cindex dark corner
+When using the integer format-control letters for values that are outside
+the range of a C @code{long} integer, @code{gawk} will switch to the
+@samp{%g} format specifier. Other versions of @code{awk} may print
+invalid values, or do something else entirely (d.c.).
+
+@node Format Modifiers, Printf Examples, Control Letters, Printf
+@subsection Modifiers for @code{printf} Formats
+
+@cindex @code{printf}, modifiers
+@cindex modifiers (in format specifiers)
+A format specification can also include @dfn{modifiers} that can control
+how much of the item's value is printed and how much space it gets.  The
+modifiers come between the @samp{%} and the format-control letter.
+In the examples below, we use the bullet symbol ``@bullet{}'' to represent
+spaces in the output. Here are the possible modifiers, in the order in
+which they may appear:
+
+@table @code
+@item -
+The minus sign, used before the width modifier (see below),
+says to left-justify
+the argument within its specified width.  Normally the argument
+is printed right-justified in the specified width.  Thus,
+
+@example
+printf "%-4s", "foo"
+@end example
+
+@noindent
+prints @samp{foo@bullet{}}.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternate form'' for certain control letters.
+For @samp{%o}, supply a leading zero.
+For @samp{%x}, and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for
+a non-zero result.
+For @samp{%e}, @samp{%E}, and @samp{%f}, the result will always contain a
+decimal point.
+For @samp{%g}, and @samp{%G}, trailing zeros are not removed from the result.
+
+@cindex dark corner
+@item 0
+A leading @samp{0} (zero) acts as a flag, that indicates output should be
+padded with zeros instead of spaces.
+This applies even to non-numeric output formats (d.c.).
+This flag only has an effect when the field width is wider than the
+value to be printed.
+
+@item @var{width}
+This is a number specifying the desired minimum width of a field.  Inserting any
+number between the @samp{%} sign and the format control character forces the
+field to be expanded to this width.  The default way to do this is to
+pad with spaces on the left.  For example,
+
+@example
+printf "%4s", "foo"
+@end example
+
+@noindent
+prints @samp{@bullet{}foo}.
+
+The value of @var{width} is a minimum width, not a maximum.  If the item
+value requires more than @var{width} characters, it can be as wide as
+necessary.  Thus,
+
+@example
+printf "%4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foobar}.
+
+Preceding the @var{width} with a minus sign causes the output to be
+padded with spaces on the right, instead of on the left.
+
+@item .@var{prec}
+This is a number that specifies the precision to use when printing.
+For the @samp{e}, @samp{E}, and @samp{f} formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the @samp{g}, and @samp{G} formats, it specifies the maximum number
+of significant digits.  For the @samp{d}, @samp{o}, @samp{i}, @samp{u},
+@samp{x}, and @samp{X} formats, it specifies the minimum number of
+digits to print.  For a string, it specifies the maximum number of
+characters from the string that should be printed.  Thus,
+
+@example
+printf "%.4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foob}.
+@end table
+
+The C library @code{printf}'s dynamic @var{width} and @var{prec}
+capability (for example, @code{"%*.*s"}) is supported.  Instead of
+supplying explicit @var{width} and/or @var{prec} values in the format
+string, you pass them in the argument list.  For example:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%*.*s\n", w, p, s
+@end example
+
+@noindent
+is exactly equivalent to
+
+@example
+s = "abcdefg"
+printf "%5.3s\n", s
+@end example
+
+@noindent
+Both programs output @samp{@w{@bullet{}@bullet{}abc}}.
+
+Earlier versions of @code{awk} did not support this capability.
+If you must use such a version, you may simulate this feature by using
+concatenation to build up the format string, like so:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%" w "." p "s\n", s
+@end example
+
+@noindent
+This is not particularly easy to read, but it does work.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+C programmers may be used to supplying additional @samp{l} and @samp{h}
+flags in @code{printf} format strings. These are not valid in @code{awk}.
+Most @code{awk} implementations silently ignore these flags.
+If @samp{--lint} is provided on the command line
+(@pxref{Options, ,Command Line Options}),
+@code{gawk} will warn about their use. If @samp{--posix} is supplied,
+their use is a fatal error.
+
+@node Printf Examples,  , Format Modifiers, Printf
+@subsection Examples Using @code{printf}
+
+Here is how to use @code{printf} to make an aligned table:
+
+@example
+awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end example
+
+@noindent
+prints the names of bulletin boards (@code{$1}) of the file
+@file{BBS-list} as a string of 10 characters, left justified.  It also
+prints the phone numbers (@code{$2}) afterward on the line.  This
+produces an aligned two-column table of names and phone numbers:
+
+@example
+@group
+$ awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@print{} aardvark   555-5553
+@print{} alpo-net   555-3412
+@print{} barfly     555-7685
+@print{} bites      555-1675
+@print{} camelot    555-0542
+@print{} core       555-2912
+@print{} fooey      555-1234
+@print{} foot       555-6699
+@print{} macfoo     555-6480
+@print{} sdace      555-3430
+@print{} sabafoo    555-2127
+@end group
+@end example
+
+Did you notice that we did not specify that the phone numbers be printed
+as numbers?  They had to be printed as strings because the numbers are
+separated by a dash.
+If we had tried to print the phone numbers as numbers, all we would have
+gotten would have been the first three digits, @samp{555}.
+This would have been pretty confusing.
+
+We did not specify a width for the phone numbers because they are the
+last things on their lines.  We don't need to put spaces after them.
+
+We could make our table look even nicer by adding headings to the tops
+of the columns.  To do this, we use the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+to force the header to be printed only once, at the beginning of
+the @code{awk} program:
+
+@example
+@group
+awk 'BEGIN @{ print "Name      Number"
+             print "----      ------" @}
+     @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+Did you notice that we mixed @code{print} and @code{printf} statements in
+the above example?  We could have used just @code{printf} statements to get
+the same results:
+
+@example
+@group
+awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
+             printf "%-10s %s\n", "----", "------" @}
+     @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+@noindent
+By printing each column heading with the same format specification
+used for the elements of the column, we have made sure that the headings
+are aligned just like the columns.
+
+The fact that the same format specification is used three times can be
+emphasized by storing it in a variable, like this:
+
+@example
+@group
+awk 'BEGIN @{ format = "%-10s %s\n"
+             printf format, "Name", "Number"
+             printf format, "----", "------" @}
+     @{ printf format, $1, $2 @}' BBS-list
+@end group
+@end example
+
+@c !!! exercise
+See if you can use the @code{printf} statement to line up the headings and
+table data for our @file{inventory-shipped} example covered earlier in the
+section on the @code{print} statement
+(@pxref{Print, ,The @code{print} Statement}).
+
+@node Redirection, Special Files, Printf, Printing
+@section Redirecting Output of @code{print} and @code{printf}
+
+@cindex output redirection
+@cindex redirection of output
+So far we have been dealing only with output that prints to the standard
+output, usually your terminal.  Both @code{print} and @code{printf} can
+also send their output to other places.
+This is called @dfn{redirection}.
+
+A redirection appears after the @code{print} or @code{printf} statement.
+Redirections in @code{awk} are written just like redirections in shell
+commands, except that they are written inside the @code{awk} program.
+
+There are three forms of output redirection: output to a file,
+output appended to a file, and output through a pipe to another
+command.
+They are all shown for
+the @code{print} statement, but they work identically for @code{printf}
+also.
+
+@table @code
+@item print @var{items} > @var{output-file}
+This type of redirection prints the items into the output file
+@var{output-file}.  The file name @var{output-file} can be any
+expression.  Its value is changed to a string and then used as a
+file name (@pxref{Expressions}).
+
+When this type of redirection is used, the @var{output-file} is erased
+before the first output is written to it.  Subsequent writes
+to the same @var{output-file} do not
+erase @var{output-file}, but append to it.  If @var{output-file} does
+not exist, then it is created.
+
+For example, here is how an @code{awk} program can write a list of
+BBS names to a file @file{name-list} and a list of phone numbers to a
+file @file{phone-list}.  Each output file contains one name or number
+per line.
+
+@example
+@group
+$ awk '@{ print $2 > "phone-list"
+>        print $1 > "name-list" @}' BBS-list
+@end group
+@group
+$ cat phone-list
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end group
+@group
+$ cat name-list
+@print{} aardvark
+@print{} alpo-net
+@dots{}
+@end group
+@end example
+
+@item print @var{items} >> @var{output-file}
+This type of redirection prints the items into the pre-existing output file
+@var{output-file}.  The difference between this and the
+single-@samp{>} redirection is that the old contents (if any) of
+@var{output-file} are not erased.  Instead, the @code{awk} output is
+appended to the file.
+If @var{output-file} does not exist, then it is created.
+
+@cindex pipes for output
+@cindex output, piping
+@item print @var{items} | @var{command}
+It is also possible to send output to another program through a pipe
+instead of into a
+file.   This type of redirection opens a pipe to @var{command} and writes
+the values of @var{items} through this pipe, to another process created
+to execute @var{command}.
+
+The redirection argument @var{command} is actually an @code{awk}
+expression.  Its value is converted to a string, whose contents give the
+shell command to be run.
+
+For example, this produces two files, one unsorted list of BBS names
+and one list sorted in reverse alphabetical order:
+
+@example
+awk '@{ print $1 > "names.unsorted"
+       command = "sort -r > names.sorted"
+       print $1 | command @}' BBS-list
+@end example
+
+Here the unsorted list is written with an ordinary redirection while
+the sorted list is written by piping through the @code{sort} utility.
+
+This example uses redirection to mail a message to a mailing
+list @samp{bug-system}.  This might be useful when trouble is encountered
+in an @code{awk} script run periodically for system maintenance.
+
+@example
+report = "mail bug-system"
+print "Awk script failed:", $0 | report
+m = ("at record number " FNR " of " FILENAME)
+print m | report
+close(report)
+@end example
+
+The message is built using string concatenation and saved in the variable
+@code{m}.  It is then sent down the pipeline to the @code{mail} program.
+
+We call the @code{close} function here because it's a good idea to close
+the pipe as soon as all the intended output has been sent to it.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes},
+for more information
+on this.  This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}: it is not necessary to always
+use a string constant.  Using a variable is generally a good idea,
+since @code{awk} requires you to spell the string value identically
+every time.
+@end table
+
+Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
+to open a file or pipe only if the particular @var{file} or @var{command}
+you've specified has not already been written to by your program, or if
+it has been closed since it was last written to.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex limitations
+@cindex implementation limits
+@iftex
+As mentioned earlier
+(@pxref{Getline Summary,  , Summary of @code{getline} Variants}),
+many
+@end iftex
+@ifinfo
+Many
+@end ifinfo
+@code{awk} implementations limit the number of pipelines an @code{awk}
+program may have open to just one!  In @code{gawk}, there is no such limit.
+You can open as many pipelines as the underlying operating system will
+permit.
+
+@node Special Files, Close Files And Pipes , Redirection, Printing
+@section Special File Names in @code{gawk}
+@cindex standard input
+@cindex standard output
+@cindex standard error output
+@cindex file descriptors
+
+Running programs conventionally have three input and output streams
+already available to them for reading and writing.  These are known as
+the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
+output}.  These streams are, by default, connected to your terminal, but
+they are often redirected with the shell, via the @samp{<}, @samp{<<},
+@samp{>}, @samp{>>}, @samp{>&} and @samp{|} operators.  Standard error
+is typically used for writing error messages; the reason we have two separate
+streams, standard output and standard error, is so that they can be
+redirected separately.
+
+@cindex differences between @code{gawk} and @code{awk}
+In other implementations of @code{awk}, the only way to write an error
+message to standard error in an @code{awk} program is as follows:
+
+@example
+print "Serious error detected!" | "cat 1>&2"
+@end example
+
+@noindent
+This works by opening a pipeline to a shell command which can access the
+standard error stream which it inherits from the @code{awk} process.
+This is far from elegant, and is also inefficient, since it requires a
+separate process.  So people writing @code{awk} programs often
+neglect to do this.  Instead, they send the error messages to the
+terminal, like this:
+
+@example
+@group
+print "Serious error detected!" > "/dev/tty"
+@end group
+@end example
+
+@noindent
+This usually has the same effect, but not always: although the
+standard error stream is usually the terminal, it can be redirected, and
+when that happens, writing to the terminal is not correct.  In fact, if
+@code{awk} is run from a background job, it may not have a terminal at all.
+Then opening @file{/dev/tty} will fail.
+
+@code{gawk} provides special file names for accessing the three standard
+streams.  When you redirect input or output in @code{gawk}, if the file name
+matches one of these special names, then @code{gawk} directly uses the
+stream it stands for.
+
+@cindex @file{/dev/stdin}
+@cindex @file{/dev/stdout}
+@cindex @file{/dev/stderr}
+@cindex @file{/dev/fd}
+@c @cartouche
+@table @file
+@item /dev/stdin
+The standard input (file descriptor 0).
+
+@item /dev/stdout
+The standard output (file descriptor 1).
+
+@item /dev/stderr
+The standard error output (file descriptor 2).
+
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}.  Such a file must have
+been opened by the program initiating the @code{awk} execution (typically
+the shell).  Unless you take special pains in the shell from which
+you invoke @code{gawk}, only descriptors 0, 1 and 2 are available.
+@end table
+@c @end cartouche
+
+The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
+respectively, but they are more self-explanatory.
+
+The proper way to write an error message in a @code{gawk} program
+is to use @file{/dev/stderr}, like this:
+
+@example
+print "Serious error detected!" > "/dev/stderr"
+@end example
+
+@code{gawk} also provides special file names that give access to information
+about the running @code{gawk} process.  Each of these ``files'' provides
+a single record of information.  To read them more than once, you must
+first close them with the @code{close} function
+(@pxref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}).
+The filenames are:
+
+@cindex process information
+@cindex @file{/dev/pid}
+@cindex @file{/dev/pgrpid}
+@cindex @file{/dev/ppid}
+@cindex @file{/dev/user}
+@c @cartouche
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item  /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item  /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with spaces.  The fields represent the
+following information:
+
+@table @code
+@item $1
+The return value of the @code{getuid} system call
+(the real user ID number).
+
+@item $2
+The return value of the @code{geteuid} system call
+(the effective user ID number).
+
+@item $3
+The return value of the @code{getgid} system call
+(the real group ID number).
+
+@item $4
+The return value of the @code{getegid} system call
+(the effective group ID number).
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)
+@end table
+@c @end cartouche
+
+These special file names may be used on the command line as data
+files, as well as for I/O redirections within an @code{awk} program.
+They may not be used as source files with the @samp{-f} option.
+
+Recognition of these special file names is disabled if @code{gawk} is in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+
+@strong{Caution}:  Unless your system actually has a @file{/dev/fd} directory
+(or any of the other above listed special files),
+the interpretation of these file names is done by @code{gawk} itself.
+For example, using @samp{/dev/fd/4} for output will actually write on
+file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
+from file descriptor 4.  Most of the time this does not matter; however, it
+is important to @emph{not} close any of the files related to file descriptors
+0, 1, and 2.  If you do close one of these files, unpredictable behavior
+will result.
+
+The special files that provide process-related information may disappear
+in a future version of @code{gawk}.
+@xref{Future Extensions, ,Probable Future Extensions}.
+
+@node Close Files And Pipes, , Special Files, Printing
+@section Closing Input and Output Files and Pipes
+@cindex closing input files and pipes
+@cindex closing output files and pipes
+@findex close
+
+If the same file name or the same shell command is used with
+@code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}})
+more than once during the execution of an @code{awk}
+program, the file is opened (or the command is executed) only the first time.
+At that time, the first record of input is read from that file or command.
+The next time the same file or command is used in @code{getline}, another
+record is read from it, and so on.
+
+Similarly, when a file or pipe is opened for output, the file name or command
+associated with
+it is remembered by @code{awk} and subsequent writes to the same file or
+command are appended to the previous writes.  The file or pipe stays
+open until @code{awk} exits.
+
+This implies that if you want to start reading the same file again from
+the beginning, or if you want to rerun a shell command (rather than
+reading more output from the command), you must take special steps.
+What you must do is use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression.  Its
+value must @emph{exactly} match the string that was used to open the file or
+start the command (spaces and other ``irrelevant'' characters
+included). For example, if you open a pipe with this:
+
+@example
+"sort -r names" | getline foo
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r names")
+@end example
+
+Once this function call is executed, the next @code{getline} from that
+file or command, or the next @code{print} or @code{printf} to that
+file or command, will reopen the file or rerun the command.
+
+Because the expression that you use to close a file or pipeline must
+exactly match the expression used to open the file or run the command,
+it is good practice to use a variable to store the file name or command.
+The previous example would become
+
+@example
+sortcom = "sort -r names"
+sortcom | getline foo
+@dots{}
+close(sortcom)
+@end example
+
+@noindent
+This helps avoid hard-to-find typographical errors in your @code{awk}
+programs.
+
+Here are some reasons why you might need to close an output file:
+
+@itemize @bullet
+@item
+To write a file and read it back later on in the same @code{awk}
+program.  Close the file when you are finished writing it; then
+you can start reading it with @code{getline}.
+
+@item
+To write numerous files, successively, in the same @code{awk}
+program.  If you don't close the files, eventually you may exceed a
+system limit on the number of open files in one process.  So close
+each one when you are finished writing it.
+
+@item
+To make a command finish.  When you redirect output through a pipe,
+the command reading the pipe normally continues to try to read input
+as long as the pipe is open.  Often this means the command cannot
+really do its work until the pipe is closed.  For example, if you
+redirect output to the @code{mail} program, the message is not
+actually sent until the pipe is closed.
+
+@item
+To run the same program a second time, with the same arguments.
+This is not the same thing as giving more input to the first run!
+
+For example, suppose you pipe output to the @code{mail} program.  If you
+output several lines redirected to this pipe without closing it, they make
+a single message of several lines.  By contrast, if you close the pipe
+after each line of output, then each line makes a separate message.
+@end itemize
+
+@vindex ERRNO
+@cindex differences between @code{gawk} and @code{awk}
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex portability issues
+If you use more files than the system allows you to have open,
+@code{gawk} will attempt to multiplex the available open files among
+your data files.  @code{gawk}'s ability to do this depends upon the
+facilities of your operating system: it may not always work.  It is
+therefore both good practice and good portability advice to always
+use @code{close} on your files when you are done with them.
+
+@node Expressions, Patterns and Actions, Printing, Top
+@chapter Expressions
+@cindex expression
+
+Expressions are the basic building blocks of @code{awk} patterns
+and actions.  An expression evaluates to a value, which you can print, test,
+store in a variable or pass to a function.  Additionally, an expression
+can assign a new value to a variable or a field, with an assignment operator.
+
+An expression can serve as a pattern or action statement on its own.
+Most other kinds of
+statements contain one or more expressions which specify data on which to
+operate.  As in other languages, expressions in @code{awk} include
+variables, array references, constants, and function calls, as well as
+combinations of these with various operators.
+
+@menu
+* Constants::                   String, numeric, and regexp constants.
+* Using Constant Regexps::      When and how to use a regexp constant.
+* Variables::                   Variables give names to values for later use.
+* Conversion::                  The conversion of strings to numbers and vice
+                                versa.
+* Arithmetic Ops::              Arithmetic operations (@samp{+}, @samp{-},
+                                etc.)
+* Concatenation::               Concatenating strings.
+* Assignment Ops::              Changing the value of a variable or a field.
+* Increment Ops::               Incrementing the numeric value of a variable.
+* Truth Values::                What is ``true'' and what is ``false''.
+* Typing and Comparison::       How variables acquire types, and how this
+                                affects comparison of numbers and strings with
+                                @samp{<}, etc.
+* Boolean Ops::                 Combining comparison expressions using boolean
+                                operators @samp{||} (``or''), @samp{&&}
+                                (``and'') and @samp{!} (``not'').
+* Conditional Exp::             Conditional expressions select between two
+                                subexpressions under control of a third
+                                subexpression.
+* Function Calls::              A function call is an expression.
+* Precedence::                  How various operators nest.
+@end menu
+
+@node Constants, Using Constant Regexps, Expressions, Expressions
+@section Constant Expressions
+@cindex constants, types of
+@cindex string constants
+
+The simplest type of expression is the @dfn{constant}, which always has
+the same value.  There are three types of constants: numeric constants,
+string constants, and regular expression constants.
+
+@menu
+* Scalar Constants::            Numeric and string constants.
+* Regexp Constants::            Regular Expression constants.
+@end menu
+
+@node Scalar Constants, Regexp Constants, Constants, Constants
+@subsection Numeric and String Constants
+
+@cindex numeric constant
+@cindex numeric value
+A @dfn{numeric constant} stands for a number.  This number can be an
+integer, a decimal fraction, or a number in scientific (exponential)
+notation.@footnote{The internal representation uses double-precision
+floating point numbers. If you don't know what that means, then don't
+worry about it.} Here are some examples of numeric constants, which all
+have the same value:
+
+@example
+105
+1.05e+2
+1050e-1
+@end example
+
+A string constant consists of a sequence of characters enclosed in
+double-quote marks.  For example:
+
+@example
+"parrot"
+@end example
+
+@noindent
+@cindex differences between @code{gawk} and @code{awk}
+represents the string whose contents are @samp{parrot}.  Strings in
+@code{gawk} can be of any length and they can contain any of the possible
+eight-bit ASCII characters including ASCII NUL (character code zero).
+Other @code{awk}
+implementations may have difficulty with some character codes.
+
+@node Regexp Constants,  , Scalar Constants, Constants
+@subsection Regular Expression Constants
+
+@cindex @code{~} operator
+@cindex @code{!~} operator
+A regexp constant is a regular expression description enclosed in
+slashes, such as @code{@w{/^beginning and end$/}}.  Most regexps used in
+@code{awk} programs are constant, but the @samp{~} and @samp{!~}
+matching operators can also match computed or ``dynamic'' regexps
+(which are just ordinary strings or variables that contain a regexp).
+
+@node Using Constant Regexps, Variables, Constants, Expressions
+@section Using Regular Expression Constants
+
+When used on the right hand side of the @samp{~} or @samp{!~}
+operators, a regexp constant merely stands for the regexp that is to be
+matched.
+
+@cindex dark corner
+Regexp constants (such as @code{/foo/}) may be used like simple expressions.
+When a
+regexp constant appears by itself, it has the same meaning as if it appeared
+in a pattern, i.e.@: @samp{($0 ~ /foo/)} (d.c.)
+(@pxref{Expression Patterns, ,Expressions as Patterns}).
+This means that the two code segments,
+
+@example
+if ($0 ~ /barfly/ || $0 ~ /camelot/)
+    print "found"
+@end example
+
+@noindent
+and
+
+@example
+if (/barfly/ || /camelot/)
+    print "found"
+@end example
+
+@noindent
+are exactly equivalent.
+
+One rather bizarre consequence of this rule is that the following
+boolean expression is valid, but does not do what the user probably
+intended:
+
+@example
+# note that /foo/ is on the left of the ~
+if (/foo/ ~ $1) print "found foo"
+@end example
+
+@noindent
+This code is ``obviously'' testing @code{$1} for a match against the regexp
+@code{/foo/}.  But in fact, the expression @samp{/foo/ ~ $1} actually means
+@samp{($0 ~ /foo/) ~ $1}.  In other words, first match the input record
+against the regexp @code{/foo/}.  The result will be either zero or one,
+depending upon the success or failure of the match.  Then match that result
+against the first field in the record.
+
+Since it is unlikely that you would ever really wish to make this kind of
+test, @code{gawk} will issue a warning when it sees this construct in
+a program.
+
+Another consequence of this rule is that the assignment statement
+
+@example
+matches = /foo/
+@end example
+
+@noindent
+will assign either zero or one to the variable @code{matches}, depending
+upon the contents of the current input record.
+
+This feature of the language was never well documented until the
+POSIX specification.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex dark corner
+Constant regular expressions are also used as the first argument for
+the @code{gensub}, @code{sub} and @code{gsub} functions, and as the
+second argument of the @code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Modern implementations of @code{awk}, including @code{gawk}, allow
+the third argument of @code{split} to be a regexp constant, while some
+older implementations do not (d.c.).
+
+This can lead to confusion when attempting to use regexp constants
+as arguments to user defined functions
+(@pxref{User-defined, , User-defined Functions}).
+For example:
+
+@example
+@group
+function mysub(pat, repl, str, global)
+@{
+    if (global)
+        gsub(pat, repl, str)
+    else
+        sub(pat, repl, str)
+    return str
+@}
+@end group
+
+@group
+@{
+    @dots{}
+    text = "hi! hi yourself!"
+    mysub(/hi/, "howdy", text, 1)
+    @dots{}
+@}
+@end group
+@end example
+
+In this example, the programmer wishes to pass a regexp constant to the
+user-defined function @code{mysub}, which will in turn pass it on to
+either @code{sub} or @code{gsub}.  However, what really happens is that
+the @code{pat} parameter will be either one or zero, depending upon whether
+or not @code{$0} matches @code{/hi/}.
+
+As it is unlikely that you would ever really wish to pass a truth value
+in this way, @code{gawk} will issue a warning when it sees a regexp
+constant used as a parameter to a user-defined function.
+
+@node Variables, Conversion, Using Constant Regexps, Expressions
+@section Variables
+
+Variables are ways of storing values at one point in your program for
+use later in another part of your program.  You can manipulate them
+entirely within your program text, and you can also assign values to
+them on the @code{awk} command line.
+
+@menu
+* Using Variables::             Using variables in your programs.
+* Assignment Options::          Setting variables on the command line and a
+                                summary of command line syntax. This is an
+                                advanced method of input.
+@end menu
+
+@node Using Variables, Assignment Options, Variables, Variables
+@subsection Using Variables in a Program
+
+@cindex variables, user-defined
+@cindex user-defined variables
+Variables let you give names to values and refer to them later.  You have
+already seen variables in many of the examples.  The name of a variable
+must be a sequence of letters, digits and underscores, but it may not begin
+with a digit.  Case is significant in variable names; @code{a} and @code{A}
+are distinct variables.
+
+A variable name is a valid expression by itself; it represents the
+variable's current value.  Variables are given new values with
+@dfn{assignment operators}, @dfn{increment operators} and
+@dfn{decrement operators}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+A few variables have special built-in meanings, such as @code{FS}, the
+field separator, and @code{NF}, the number of fields in the current
+input record.  @xref{Built-in Variables}, for a list of them.  These
+built-in variables can be used and assigned just like all other
+variables, but their values are also used or changed automatically by
+@code{awk}.  All built-in variables names are entirely upper-case.
+
+Variables in @code{awk} can be assigned either numeric or string
+values.  By default, variables are initialized to the empty string, which
+is zero if converted to a number.  There is no need to
+``initialize'' each variable explicitly in @code{awk},
+the way you would in C and in most other traditional languages.
+
+@node Assignment Options,  , Using Variables, Variables
+@subsection Assigning Variables on the Command Line
+
+You can set any @code{awk} variable by including a @dfn{variable assignment}
+among the arguments on the command line when you invoke @code{awk}
+(@pxref{Other Arguments, ,Other Command Line Arguments}).  Such an assignment has
+this form:
+
+@example
+@var{variable}=@var{text}
+@end example
+
+@noindent
+With it, you can set a variable either at the beginning of the
+@code{awk} run or in between input files.
+
+If you precede the assignment with the @samp{-v} option, like this:
+
+@example
+-v @var{variable}=@var{text}
+@end example
+
+@noindent
+then the variable is set at the very beginning, before even the
+@code{BEGIN} rules are run.  The @samp{-v} option and its assignment
+must precede all the file name arguments, as well as the program text.
+(@xref{Options, ,Command Line Options}, for more information about
+the @samp{-v} option.)
+
+Otherwise, the variable assignment is performed at a time determined by
+its position among the input file arguments: after the processing of the
+preceding input file argument.  For example:
+
+@example
+awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@end example
+
+@noindent
+prints the value of field number @code{n} for all input records.  Before
+the first file is read, the command line sets the variable @code{n}
+equal to four.  This causes the fourth field to be printed in lines from
+the file @file{inventory-shipped}.  After the first file has finished,
+but before the second file is started, @code{n} is set to two, so that the
+second field is printed in lines from @file{BBS-list}.
+
+@example
+@group
+$ awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@print{} 15
+@print{} 24
+@dots{}
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end group
+@end example
+
+Command line arguments are made available for explicit examination by
+the @code{awk} program in an array named @code{ARGV}
+(@pxref{ARGC and ARGV, ,Using @code{ARGC} and @code{ARGV}}).
+
+@cindex dark corner
+@code{awk} processes the values of command line assignments for escape
+sequences (d.c.) (@pxref{Escape Sequences}).
+
+@node Conversion, Arithmetic Ops, Variables, Expressions
+@section Conversion of Strings and Numbers
+
+@cindex conversion of strings and numbers
+Strings are converted to numbers, and numbers to strings, if the context
+of the @code{awk} program demands it.  For example, if the value of
+either @code{foo} or @code{bar} in the expression @samp{foo + bar}
+happens to be a string, it is converted to a number before the addition
+is performed.  If numeric values appear in string concatenation, they
+are converted to strings.  Consider this:
+
+@example
+two = 2; three = 3
+print (two three) + 4
+@end example
+
+@noindent
+This prints the (numeric) value 27.  The numeric values of
+the variables @code{two} and @code{three} are converted to strings and
+concatenated together, and the resulting string is converted back to the
+number 23, to which four is then added.
+
+@cindex null string
+@cindex empty string
+@cindex type conversion
+If, for some reason, you need to force a number to be converted to a
+string, concatenate the empty string, @code{""}, with that number.
+To force a string to be converted to a number, add zero to that string.
+
+A string is converted to a number by interpreting any numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
+has a numeric value of 25.
+Strings that can't be interpreted as valid numbers are converted to
+zero.
+
+@vindex CONVFMT
+The exact manner in which numbers are converted into strings is controlled
+by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
+Numbers are converted using the @code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation})
+with @code{CONVFMT} as the format
+specifier.
+
+@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
+at least six significant digits.  For some applications you will want to
+change it to specify more precision.  Double precision on most modern
+machines gives you 16 or 17 decimal digits of precision.
+
+Strange results can happen if you set @code{CONVFMT} to a string that doesn't
+tell @code{sprintf} how to format floating point numbers in a useful way.
+For example, if you forget the @samp{%} in the format, all numbers will be
+converted to the same constant string.
+
+@cindex dark corner
+As a special case, if a number is an integer, then the result of converting
+it to a string is @emph{always} an integer, no matter what the value of
+@code{CONVFMT} may be.  Given the following code fragment:
+
+@example
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+@end example
+
+@noindent
+@code{b} has the value @code{"12"}, not @code{"12.00"} (d.c.).
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@vindex OFMT
+Prior to the POSIX standard, @code{awk} specified that the value
+of @code{OFMT} was used for converting numbers to strings.  @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversion from the semantics of printing.  Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}.  In the vast majority
+of cases, old @code{awk} programs will not change their behavior.
+However, this use of @code{OFMT} is something to keep in mind if you must
+port your program to other implementations of @code{awk}; we recommend
+that instead of changing your programs, you just port @code{gawk} itself!
+@xref{Print, ,The @code{print} Statement},
+for more information on the @code{print} statement.
+
+@node Arithmetic Ops, Concatenation, Conversion, Expressions
+@section Arithmetic Operators
+@cindex arithmetic operators
+@cindex operators, arithmetic
+@cindex addition
+@cindex subtraction
+@cindex multiplication
+@cindex division
+@cindex remainder
+@cindex quotient
+@cindex exponentiation
+
+The @code{awk} language uses the common arithmetic operators when
+evaluating expressions.  All of these arithmetic operators follow normal
+precedence rules, and work as you would expect them to.
+
+Here is a file @file{grades} containing a list of student names and
+three test scores per student (it's a small class):
+
+@example
+Pat   100 97 58
+Sandy  84 72 93
+Chris  72 92 89
+@end example
+
+@noindent
+This programs takes the file @file{grades}, and prints the average
+of the scores.
+
+@example
+$ awk '@{ sum = $2 + $3 + $4 ; avg = sum / 3
+>        print $1, avg @}' grades
+@print{} Pat 85
+@print{} Sandy 83
+@print{} Chris 84.3333
+@end example
+
+This table lists the arithmetic operators in @code{awk}, in order from
+highest precedence to lowest:
+
+@c @cartouche
+@table @code
+@item - @var{x}
+Negation.
+
+@item + @var{x}
+Unary plus.  The expression is converted to a number.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item @var{x} ^ @var{y}
+@itemx @var{x} ** @var{y}
+Exponentiation: @var{x} raised to the @var{y} power.  @samp{2 ^ 3} has
+the value eight.  The character sequence @samp{**} is equivalent to
+@samp{^}.  (The POSIX standard only specifies the use of @samp{^}
+for exponentiation.)
+
+@item @var{x} * @var{y}
+Multiplication.
+
+@item @var{x} / @var{y}
+Division.  Since all numbers in @code{awk} are
+real numbers, the result is not rounded to an integer: @samp{3 / 4}
+has the value 0.75.
+
+@item @var{x} % @var{y}
+@cindex differences between @code{gawk} and @code{awk}
+Remainder.  The quotient is rounded toward zero to an integer,
+multiplied by @var{y} and this result is subtracted from @var{x}.
+This operation is sometimes known as ``trunc-mod.''  The following
+relation always holds:
+
+@example
+b * int(a / b) + (a % b) == a
+@end example
+
+One possibly undesirable effect of this definition of remainder is that
+@code{@var{x} % @var{y}} is negative if @var{x} is negative.  Thus,
+
+@example
+-17 % 8 = -1
+@end example
+
+In other @code{awk} implementations, the signedness of the remainder
+may be machine dependent.
+@c !!! what does posix say?
+
+@item @var{x} + @var{y}
+Addition.
+
+@item @var{x} - @var{y}
+Subtraction.
+@end table
+@c @end cartouche
+
+For maximum portability, do not use the @samp{**} operator.
+
+Unary plus and minus have the same precedence,
+the multiplication operators all have the same precedence, and
+addition and subtraction have the same precedence.
+
+@node Concatenation, Assignment Ops, Arithmetic Ops, Expressions
+@section String Concatenation
+@cindex Kernighan, Brian
+@display
+@i{It seemed like a good idea at the time.}
+Brian Kernighan
+@end display
+@sp 1
+
+@cindex string operators
+@cindex operators, string
+@cindex concatenation
+There is only one string operation: concatenation.  It does not have a
+specific operator to represent it.  Instead, concatenation is performed by
+writing expressions next to one another, with no operator.  For example:
+
+@example
+@group
+$ awk '@{ print "Field number one: " $1 @}' BBS-list
+@print{} Field number one: aardvark
+@print{} Field number one: alpo-net
+@dots{}
+@end group
+@end example
+
+Without the space in the string constant after the @samp{:}, the line
+would run together.  For example:
+
+@example
+@group
+$ awk '@{ print "Field number one:" $1 @}' BBS-list
+@print{} Field number one:aardvark
+@print{} Field number one:alpo-net
+@dots{}
+@end group
+@end example
+
+Since string concatenation does not have an explicit operator, it is
+often necessary to insure that it happens where you want it to by
+using parentheses to enclose
+the items to be concatenated.  For example, the
+following code fragment does not concatenate @code{file} and @code{name}
+as you might expect:
+
+@example
+@group
+file = "file"
+name = "name"
+print "something meaningful" > file name
+@end group
+@end example
+
+@noindent
+It is necessary to use the following:
+
+@example
+print "something meaningful" > (file name)
+@end example
+
+We recommend that you use parentheses around concatenation in all but the
+most common contexts (such as on the right-hand side of @samp{=}).
+
+@node Assignment Ops, Increment Ops, Concatenation, Expressions
+@section Assignment Expressions
+@cindex assignment operators
+@cindex operators, assignment
+@cindex expression, assignment
+
+An @dfn{assignment} is an expression that stores a new value into a
+variable.  For example, let's assign the value one to the variable
+@code{z}:
+
+@example
+z = 1
+@end example
+
+After this expression is executed, the variable @code{z} has the value one.
+Whatever old value @code{z} had before the assignment is forgotten.
+
+Assignments can store string values also.  For example, this would store
+the value @code{"this food is good"} in the variable @code{message}:
+
+@example
+thing = "food"
+predicate = "good"
+message = "this " thing " is " predicate
+@end example
+
+@noindent
+(This also illustrates string concatenation.)
+
+The @samp{=} sign is called an @dfn{assignment operator}.  It is the
+simplest assignment operator because the value of the right-hand
+operand is stored unchanged.
+
+@cindex side effect
+Most operators (addition, concatenation, and so on) have no effect
+except to compute a value.  If you ignore the value, you might as well
+not use the operator.  An assignment operator is different; it does
+produce a value, but even if you ignore the value, the assignment still
+makes itself felt through the alteration of the variable.  We call this
+a @dfn{side effect}.
+
+@cindex lvalue
+@cindex rvalue
+The left-hand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
+an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
+These are all called @dfn{lvalues},
+which means they can appear on the left-hand side of an assignment operator.
+The right-hand operand may be any expression; it produces the new value
+which the assignment stores in the specified variable, field or array
+element. (Such values are called @dfn{rvalues}).
+
+@cindex types of variables
+It is important to note that variables do @emph{not} have permanent types.
+The type of a variable is simply the type of whatever value it happens
+to hold at the moment.  In the following program fragment, the variable
+@code{foo} has a numeric value at first, and a string value later on:
+
+@example
+@group
+foo = 1
+print foo
+foo = "bar"
+print foo
+@end group
+@end example
+
+@noindent
+When the second assignment gives @code{foo} a string value, the fact that
+it previously had a numeric value is forgotten.
+
+String values that do not begin with a digit have a numeric value of
+zero. After executing this code, the value of @code{foo} is five:
+
+@example
+foo = "a string"
+foo = foo + 5
+@end example
+
+@noindent
+(Note that using a variable as a number and then later as a string can
+be confusing and is poor programming style.  The above examples illustrate how
+@code{awk} works, @emph{not} how you should write your own programs!)
+
+An assignment is an expression, so it has a value: the same value that
+is assigned.  Thus, @samp{z = 1} as an expression has the value one.
+One consequence of this is that you can write multiple assignments together:
+
+@example
+x = y = z = 0
+@end example
+
+@noindent
+stores the value zero in all three variables.  It does this because the
+value of @samp{z = 0}, which is zero, is stored into @code{y}, and then
+the value of @samp{y = z = 0}, which is zero, is stored into @code{x}.
+
+You can use an assignment anywhere an expression is called for.  For
+example, it is valid to write @samp{x != (y = 1)} to set @code{y} to one
+and then test whether @code{x} equals one.  But this style tends to make
+programs hard to read; except in a one-shot program, you should
+not use such nesting of assignments.
+
+Aside from @samp{=}, there are several other assignment operators that
+do arithmetic with the old value of the variable.  For example, the
+operator @samp{+=} computes a new value by adding the right-hand value
+to the old value of the variable.  Thus, the following assignment adds
+five to the value of @code{foo}:
+
+@example
+foo += 5
+@end example
+
+@noindent
+This is equivalent to the following:
+
+@example
+foo = foo + 5
+@end example
+
+@noindent
+Use whichever one makes the meaning of your program clearer.
+
+There are situations where using @samp{+=} (or any assignment operator)
+is @emph{not} the same as simply repeating the left-hand operand in the
+right-hand expression.  For example:
+
+@cindex Rankin, Pat
+@example
+@group
+# Thanks to Pat Rankin for this example
+BEGIN  @{
+    foo[rand()] += 5
+    for (x in foo)
+       print x, foo[x]
+
+    bar[rand()] = bar[rand()] + 5
+    for (x in bar)
+       print x, bar[x]
+@}
+@end group
+@end example
+
+@noindent
+The indices of @code{bar} are guaranteed to be different, because
+@code{rand} will return different values each time it is called.
+(Arrays and the @code{rand} function haven't been covered yet.
+@xref{Arrays, ,Arrays in @code{awk}},
+and see @ref{Numeric Functions, ,Numeric Built-in Functions}, for more information).
+This example illustrates an important fact about the assignment
+operators: the left-hand expression is only evaluated @emph{once}.
+
+It is also up to the implementation as to which expression is evaluated
+first, the left-hand one or the right-hand one.
+Consider this example:
+
+@example
+i = 1
+a[i += 2] = i + 1
+@end example
+
+@noindent
+The value of @code{a[3]} could be either two or four.
+
+Here is a table of the arithmetic assignment operators.  In each
+case, the right-hand operand is an expression whose value is converted
+to a number.
+
+@c @cartouche
+@table @code
+@item @var{lvalue} += @var{increment}
+Adds @var{increment} to the value of @var{lvalue} to make the new value
+of @var{lvalue}.
+
+@item @var{lvalue} -= @var{decrement}
+Subtracts @var{decrement} from the value of @var{lvalue}.
+
+@item @var{lvalue} *= @var{coefficient}
+Multiplies the value of @var{lvalue} by @var{coefficient}.
+
+@item @var{lvalue} /= @var{divisor}
+Divides the value of @var{lvalue} by @var{divisor}.
+
+@item @var{lvalue} %= @var{modulus}
+Sets @var{lvalue} to its remainder by @var{modulus}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item @var{lvalue} ^= @var{power}
+@itemx @var{lvalue} **= @var{power}
+Raises @var{lvalue} to the power @var{power}.
+(Only the @samp{^=} operator is specified by POSIX.)
+@end table
+@c @end cartouche
+
+For maximum portability, do not use the @samp{**=} operator.
+
+@node Increment Ops, Truth Values, Assignment Ops, Expressions
+@section Increment and Decrement Operators
+
+@cindex increment operators
+@cindex operators, increment
+@dfn{Increment} and @dfn{decrement operators} increase or decrease the value of
+a variable by one.  You could do the same thing with an assignment operator, so
+the increment operators add no power to the @code{awk} language; but they
+are convenient abbreviations for very common operations.
+
+The operator to add one is written @samp{++}.  It can be used to increment
+a variable either before or after taking its value.
+
+To pre-increment a variable @var{v}, write @samp{++@var{v}}.  This adds
+one to the value of @var{v} and that new value is also the value of this
+expression.  The assignment expression @samp{@var{v} += 1} is completely
+equivalent.
+
+Writing the @samp{++} after the variable specifies post-increment.  This
+increments the variable value just the same; the difference is that the
+value of the increment expression itself is the variable's @emph{old}
+value.  Thus, if @code{foo} has the value four, then the expression @samp{foo++}
+has the value four, but it changes the value of @code{foo} to five.
+
+The post-increment @samp{foo++} is nearly equivalent to writing @samp{(foo
++= 1) - 1}.  It is not perfectly equivalent because all numbers in
+@code{awk} are floating point: in floating point, @samp{foo + 1 - 1} does
+not necessarily equal @code{foo}.  But the difference is minute as
+long as you stick to numbers that are fairly small (less than 10e12).
+
+Any lvalue can be incremented.  Fields and array elements are incremented
+just like variables.  (Use @samp{$(i++)} when you wish to do a field reference
+and a variable increment at the same time.  The parentheses are necessary
+because of the precedence of the field reference operator, @samp{$}.)
+
+@cindex decrement operators
+@cindex operators, decrement
+The decrement operator @samp{--} works just like @samp{++} except that
+it subtracts one instead of adding.  Like @samp{++}, it can be used before
+the lvalue to pre-decrement or after it to post-decrement.
+
+Here is a summary of increment and decrement expressions.
+
+@c @cartouche
+@table @code
+@item ++@var{lvalue}
+This expression increments @var{lvalue} and the new value becomes the
+value of the expression.
+
+@item @var{lvalue}++
+This expression increments @var{lvalue}, but
+the value of the expression is the @emph{old} value of @var{lvalue}.
+
+@item --@var{lvalue}
+Like @samp{++@var{lvalue}}, but instead of adding, it subtracts.  It
+decrements @var{lvalue} and delivers the value that results.
+
+@item @var{lvalue}--
+Like @samp{@var{lvalue}++}, but instead of adding, it subtracts.  It
+decrements @var{lvalue}.  The value of the expression is the @emph{old}
+value of @var{lvalue}.
+@end table
+@c @end cartouche
+
+@node Truth Values, Typing and Comparison, Increment Ops, Expressions
+@section True and False in @code{awk}
+@cindex truth values
+@cindex logical true
+@cindex logical false
+
+Many programming languages have a special representation for the concepts
+of ``true'' and ``false.''  Such languages usually use the special
+constants @code{true} and @code{false}, or perhaps their upper-case
+equivalents.
+
+@cindex null string
+@cindex empty string
+@code{awk} is different.  It borrows a very simple concept of true and
+false from C.  In @code{awk}, any non-zero numeric value, @emph{or} any
+non-empty string value is true.  Any other value (zero or the null
+string, @code{""}) is false.  The following program will print @samp{A strange
+truth value} three times:
+
+@example
+@group
+BEGIN @{
+   if (3.1415927)
+       print "A strange truth value"
+   if ("Four Score And Seven Years Ago")
+       print "A strange truth value"
+   if (j = 57)
+       print "A strange truth value"
+@}
+@end group
+@end example
+
+@cindex dark corner
+There is a surprising consequence of the ``non-zero or non-null'' rule:
+The string constant @code{"0"} is actually true, since it is non-null (d.c.).
+
+@node Typing and Comparison, Boolean Ops, Truth Values, Expressions
+@section Variable Typing and Comparison Expressions
+@cindex comparison expressions
+@cindex expression, comparison
+@cindex expression, matching
+@cindex relational operators
+@cindex operators, relational
+@cindex regexp match/non-match operators
+@cindex variable typing
+@cindex types of variables
+@c 2e: consider splitting this section into subsections
+@display
+@i{The Guide is definitive. Reality is frequently inaccurate.}
+The Hitchhiker's Guide to the Galaxy
+@end display
+@sp 1
+
+Unlike other programming languages, @code{awk} variables do not have a
+fixed type. Instead, they can be either a number or a string, depending
+upon the value that is assigned to them.
+
+@cindex numeric string
+The 1992 POSIX standard introduced
+the concept of a @dfn{numeric string}, which is simply a string that looks
+like a number, for example, @code{@w{" +2"}}.  This concept is used
+for determining the type of a variable.
+
+The type of the variable is important, since the types of two variables
+determine how they are compared.
+
+In @code{gawk}, variable typing follows these rules.
+
+@enumerate 1
+@item
+A numeric literal or the result of a numeric operation has the @var{numeric}
+attribute.
+
+@item
+A string literal or the result of a string operation has the @var{string}
+attribute.
+
+@item
+Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
+@code{ENVIRON} elements and the
+elements of an array created by @code{split} that are numeric strings
+have the @var{strnum} attribute.  Otherwise, they have the @var{string}
+attribute.
+Uninitialized variables also have the @var{strnum} attribute.
+
+@item
+Attributes propagate across assignments, but are not changed by
+any use.
+@c  (Although a use may cause the entity to acquire an additional
+@c value such that it has both a numeric and string value -- this leaves the
+@c attribute unchanged.)
+@c This is important but not relevant
+@end enumerate
+
+The last rule is particularly important. In the following program,
+@code{a} has numeric type, even though it is later used in a string
+operation.
+
+@example
+BEGIN @{
+         a = 12.345
+         b = a " is a cute number"
+         print b
+@}
+@end example
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following, symmetric, matrix:
+
+@c thanks to Karl Berry, kb@cs.umb.edu, for major help with TeX tables
+@tex
+\centerline{
+\vbox{\bigskip % space above the table (about 1 linespace)
+% Because we have vertical rules, we can't let TeX insert interline space
+% in its usual way.
+\offinterlineskip
+%
+% Define the table template. & separates columns, and \cr ends the
+% template (and each row). # is replaced by the text of that entry on
+% each row. The template for the first column breaks down like this:
+%   \strut -- a way to make each line have the height and depth
+%             of a normal line of type, since we turned off interline spacing.
+%   \hfil -- infinite glue; has the effect of right-justifying in this case.
+%   #     -- replaced by the text (for instance, `STRNUM', in the last row).
+%   \quad -- about the width of an `M'. Just separates the columns.
+% 
+% The second column (\vrule#) is what generates the vertical rule that
+% spans table rows.
+% 
+% The doubled && before the next entry means `repeat the following
+% template as many times as necessary on each line' -- in our case, twice.
+% 
+% The template itself, \quad#\hfil, left-justifies with a little space before.
+% 
+\halign{\strut\hfil#\quad&\vrule#&&\quad#\hfil\cr
+	&&STRING	&NUMERIC	&STRNUM\cr
+% The \omit tells TeX to skip inserting the template for this column on
+% this particular row. In this case, we only want a little extra space
+% to separate the heading row from the rule below it.  the depth 2pt --
+% `\vrule depth 2pt' is that little space.
+\omit	&depth 2pt\cr
+% This is the horizontal rule below the heading. Since it has nothing to
+% do with the columns of the table, we use \noalign to get it in there.
+\noalign{\hrule}
+% Like above, this time a little more space.
+\omit	&depth 4pt\cr
+% The remaining rows have nothing special about them.
+STRING	&&string	&string		&string\cr
+NUMERIC	&&string	&numeric	&numeric\cr
+STRNUM  &&string	&numeric	&numeric\cr
+}}}
+@end tex
+@ifinfo
+@display
+	+----------------------------------------------
+	|	STRING		NUMERIC		STRNUM
+--------+----------------------------------------------
+	|
+STRING	|	string		string		string
+	|
+NUMERIC	|	string		numeric		numeric
+	|
+STRNUM	|	string		numeric		numeric
+--------+----------------------------------------------
+@end display
+@end ifinfo
+
+The basic idea is that user input that looks numeric, and @emph{only}
+user input, should be treated as numeric, even though it is actually
+made of characters, and is therefore also a string.
+
+@dfn{Comparison expressions} compare strings or numbers for
+relationships such as equality.  They are written using @dfn{relational
+operators}, which are a superset of those in C.  Here is a table of
+them:
+
+@cindex relational operators
+@cindex operators, relational
+@cindex @code{<} operator
+@cindex @code{<=} operator
+@cindex @code{>} operator
+@cindex @code{>=} operator
+@cindex @code{==} operator
+@cindex @code{!=} operator
+@cindex @code{~} operator
+@cindex @code{!~} operator
+@cindex @code{in} operator
+@c @cartouche
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if the string @var{x} matches the regexp denoted by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if the string @var{x} does not match the regexp denoted by @var{y}.
+
+@item @var{subscript} in @var{array}
+True if the array @var{array} has an element with the subscript @var{subscript}.
+@end table
+@c @end cartouche
+
+Comparison expressions have the value one if true and zero if false.
+
+When comparing operands of mixed types, numeric operands are converted
+to strings using the value of @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+Strings are compared
+by comparing the first character of each, then the second character of each,
+and so on.  Thus @code{"10"} is less than @code{"9"}.  If there are two
+strings where one is a prefix of the other, the shorter string is less than
+the longer one.  Thus @code{"abc"} is less than @code{"abcd"}.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+It is very easy to accidentally mistype the @samp{==} operator, and
+leave off one of the @samp{=}s.  The result is still valid @code{awk}
+code, but the program will not do what you mean:
+
+@example
+if (a = b)   # oops! should be a == b
+   @dots{}
+else
+   @dots{}
+@end example
+
+@noindent
+Unless @code{b} happens to be zero or the null string, the @code{if}
+part of the test will always succeed.  Because the operators are
+so similar, this kind of error is very difficult to spot when
+scanning the source code.
+
+Here are some sample expressions, how @code{gawk} compares them, and what
+the result of the comparison is.
+
+@table @code
+@item 1.5 <= 2.0
+numeric comparison (true)
+
+@item "abc" >= "xyz"
+string comparison (false)
+
+@item 1.5 != " +2"
+string comparison (true)
+
+@item "1e2" < "3"
+string comparison (true)
+
+@item a = 2; b = "2"
+@itemx a == b
+string comparison (true)
+
+@item a = 2; b = " +2"
+@itemx a == b
+string comparison (false)
+@end table
+
+In this example,
+
+@example
+@group
+$ echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
+@print{} false
+@end group
+@end example
+
+@noindent
+the result is @samp{false} since both @code{$1} and @code{$2} are numeric
+strings and thus both have the @var{strnum} attribute,
+dictating a numeric comparison.
+
+The purpose of the comparison rules and the use of numeric strings is
+to attempt to produce the behavior that is ``least surprising,'' while
+still ``doing the right thing.''
+
+@cindex comparisons, string vs. regexp
+@cindex string comparison vs. regexp comparison
+@cindex regexp comparison vs. string comparison
+String comparisons and regular expression comparisons are very different.
+For example,
+
+@example
+x == "foo"
+@end example
+
+@noindent
+has the value of one, or is true, if the variable @code{x}
+is precisely @samp{foo}.  By contrast, 
+
+@example
+x ~ /foo/
+@end example
+
+@noindent
+has the value one if @code{x} contains @samp{foo}, such as
+@code{"Oh, what a fool am I!"}.
+
+The right hand operand of the @samp{~} and @samp{!~} operators may be
+either a regexp constant (@code{/@dots{}/}), or an ordinary
+expression, in which case the value of the expression as a string is used as a
+dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}; also
+@pxref{Computed Regexps, ,Using Dynamic Regexps}).
+
+@cindex regexp as expression
+In recent implementations of @code{awk}, a constant regular
+expression in slashes by itself is also an expression.  The regexp
+@code{/@var{regexp}/} is an abbreviation for this comparison expression:
+
+@example
+$0 ~ /@var{regexp}/
+@end example
+
+One special place where @code{/foo/} is @emph{not} an abbreviation for
+@samp{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
+@samp{!~}!
+@xref{Using Constant Regexps, ,Using Regular Expression Constants},
+where this is discussed in more detail.
+
+@c This paragraph has been here since day 1, and has always bothered
+@c me, especially since the expression doesn't really make a lot of
+@c sense. So, just take it out.
+@ignore
+In some contexts it may be necessary to write parentheses around the
+regexp to avoid confusing the @code{gawk} parser.  For example,
+@samp{(/x/ - /y/) > threshold} is not allowed, but @samp{((/x/) - (/y/))
+> threshold} parses properly.
+@end ignore
+
+@node Boolean Ops, Conditional Exp, Typing and Comparison, Expressions
+@section Boolean Expressions
+@cindex expression, boolean
+@cindex boolean expressions
+@cindex operators, boolean
+@cindex boolean operators
+@cindex logical operations
+@cindex operations, logical
+@cindex short-circuit operators
+@cindex operators, short-circuit
+@cindex and operator
+@cindex or operator
+@cindex not operator
+@cindex @code{&&} operator
+@cindex @code{||} operator
+@cindex @code{!} operator
+
+A @dfn{boolean expression} is a combination of comparison expressions or
+matching expressions, using the boolean operators ``or''
+(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
+parentheses to control nesting.  The truth value of the boolean expression is
+computed by combining the truth values of the component expressions.
+Boolean expressions are also referred to as @dfn{logical expressions}.
+The terms are equivalent.
+
+Boolean expressions can be used wherever comparison and matching
+expressions can be used.  They can be used in @code{if}, @code{while},
+@code{do} and @code{for} statements
+(@pxref{Statements, ,Control Statements in Actions}).
+They have numeric values (one if true, zero if false), which come into play
+if the result of the boolean expression is stored in a variable, or
+used in arithmetic.
+
+In addition, every boolean expression is also a valid pattern, so
+you can use one as a pattern to control the execution of rules.
+
+Here are descriptions of the three boolean operators, with examples.
+
+@c @cartouche
+@table @code
+@item @var{boolean1} && @var{boolean2}
+True if both @var{boolean1} and @var{boolean2} are true.  For example,
+the following statement prints the current input record if it contains
+both @samp{2400} and @samp{foo}.
+
+@example
+if ($0 ~ /2400/ && $0 ~ /foo/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is true.  This can make a difference when @var{boolean2} contains
+expressions that have side effects: in the case of @samp{$0 ~ /foo/ &&
+($2 == bar++)}, the variable @code{bar} is not incremented if there is
+no @samp{foo} in the record.
+
+@item @var{boolean1} || @var{boolean2}
+True if at least one of @var{boolean1} or @var{boolean2} is true.
+For example, the following statement prints all records in the input
+that contain @emph{either} @samp{2400} or
+@samp{foo}, or both.
+
+@example
+if ($0 ~ /2400/ || $0 ~ /foo/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is false.  This can make a difference when @var{boolean2} contains
+expressions that have side effects.
+
+@item ! @var{boolean}
+True if @var{boolean} is false.  For example, the following program prints
+all records in the input file @file{BBS-list} that do @emph{not} contain the
+string @samp{foo}.
+
+@c A better example would be `if (! (subscript in array)) ...' but we
+@c haven't done anything with arrays or `in' yet. Sigh.
+@example
+awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
+@end example
+@end table
+@c @end cartouche
+
+The @samp{&&} and @samp{||} operators are called @dfn{short-circuit}
+operators because of the way they work.  Evaluation of the full expression
+is ``short-circuited'' if the result can be determined part way through
+its evaluation.
+
+@cindex line continuation
+You can continue a statement that uses @samp{&&} or @samp{||} simply
+by putting a newline after them.  But you cannot put a newline in front
+of either of these operators without using backslash continuation
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+The actual value of an expression using the @samp{!} operator will be
+either one or zero, depending upon the truth value of the expression it
+is applied to.
+
+The @samp{!} operator is often useful for changing the sense of a flag
+variable from false to true and back again. For example, the following
+program is one way to print lines in between special bracketing lines:
+
+@example
+$1 == "START"   @{ interested = ! interested @}
+interested == 1 @{ print @}
+$1 == "END"     @{ interested = ! interested @}
+@end example
+
+@noindent
+The variable @code{interested}, like all @code{awk} variables, starts
+out initialized to zero, which is also false.  When a line is seen whose
+first field is @samp{START}, the value of @code{interested} is toggled
+to true, using @samp{!}. The next rule prints lines as long as
+@code{interested} is true.  When a line is seen whose first field is
+@samp{END}, @code{interested} is toggled back to false.
+@ignore
+We should discuss using `next' in the two rules that toggle the
+variable, to avoid printing the bracketing lines, but that's more
+distraction than really needed.
+@end ignore
+
+@node Conditional Exp, Function Calls, Boolean Ops, Expressions
+@section Conditional Expressions
+@cindex conditional expression
+@cindex expression, conditional
+
+A @dfn{conditional expression} is a special kind of expression with
+three operands.  It allows you to use one expression's value to select
+one of two other expressions.
+
+The conditional expression is the same as in the C language:
+
+@example
+@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
+@end example
+
+@noindent
+There are three subexpressions.  The first, @var{selector}, is always
+computed first.  If it is ``true'' (not zero and not null) then
+@var{if-true-exp} is computed next and its value becomes the value of
+the whole expression.  Otherwise, @var{if-false-exp} is computed next
+and its value becomes the value of the whole expression.
+
+For example, this expression produces the absolute value of @code{x}:
+
+@example
+x > 0 ? x : -x
+@end example
+
+Each time the conditional expression is computed, exactly one of
+@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
+This is important when the expressions contain side effects.  For example,
+this conditional expression examines element @code{i} of either array
+@code{a} or array @code{b}, and increments @code{i}.
+
+@example
+x == y ? a[i++] : b[i++]
+@end example
+
+@noindent
+This is guaranteed to increment @code{i} exactly once, because each time
+only one of the two increment expressions is executed,
+and the other is not.
+@xref{Arrays, ,Arrays in @code{awk}}, 
+for more information about arrays.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex line continuation
+As a minor @code{gawk} extension,
+you can continue a statement that uses @samp{?:} simply
+by putting a newline after either character.
+However, you cannot put a newline in front
+of either character without using backslash continuation
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@node Function Calls, Precedence, Conditional Exp, Expressions
+@section Function Calls
+@cindex function call
+@cindex calling a function
+
+A @dfn{function} is a name for a particular calculation.  Because it has
+a name, you can ask for it by name at any point in the program.  For
+example, the function @code{sqrt} computes the square root of a number.
+
+A fixed set of functions are @dfn{built-in}, which means they are
+available in every @code{awk} program.  The @code{sqrt} function is one
+of these.  @xref{Built-in, ,Built-in Functions}, for a list of built-in
+functions and their descriptions.  In addition, you can define your own
+functions for use in your program.
+@xref{User-defined, ,User-defined Functions}, for how to do this.
+
+@cindex arguments in function call
+The way to use a function is with a @dfn{function call} expression,
+which consists of the function name followed immediately by a list of
+@dfn{arguments} in parentheses.  The arguments are expressions which
+provide the raw materials for the function's calculations.
+When there is more than one argument, they are separated by commas.  If
+there are no arguments, write just @samp{()} after the function name.
+Here are some examples:
+
+@example
+sqrt(x^2 + y^2)        @i{one argument}
+atan2(y, x)            @i{two arguments}
+rand()                 @i{no arguments}
+@end example
+
+@strong{Do not put any space between the function name and the
+open-parenthesis!}  A user-defined function name looks just like the name of
+a variable, and space would make the expression look like concatenation
+of a variable with an expression inside parentheses.  Space before the
+parenthesis is harmless with built-in functions, but it is best not to get
+into the habit of using space to avoid mistakes with user-defined
+functions. 
+
+Each function expects a particular number of arguments.  For example, the
+@code{sqrt} function must be called with a single argument, the number
+to take the square root of:
+
+@example
+sqrt(@var{argument})
+@end example
+
+Some of the built-in functions allow you to omit the final argument.
+If you do so, they use a reasonable default.
+@xref{Built-in, ,Built-in Functions}, for full details.  If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables, initialized to the empty string
+(@pxref{User-defined, ,User-defined Functions}).
+
+Like every other expression, the function call has a value, which is
+computed by the function based on the arguments you give it.  In this
+example, the value of @samp{sqrt(@var{argument})} is the square root of
+@var{argument}.  A function can also have side effects, such as assigning
+values to certain variables or doing I/O.
+
+Here is a command to read numbers, one number per line, and print the
+square root of each one:
+
+@example
+@group
+$ awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
+1
+@print{} The square root of 1 is 1
+3
+@print{} The square root of 3 is 1.73205
+5
+@print{} The square root of 5 is 2.23607
+@kbd{Control-d}
+@end group
+@end example
+
+@node Precedence,  , Function Calls, Expressions
+@section Operator Precedence (How Operators Nest)
+@cindex precedence
+@cindex operator precedence
+
+@dfn{Operator precedence} determines how operators are grouped, when
+different operators appear close by in one expression.  For example,
+@samp{*} has higher precedence than @samp{+}; thus, @samp{a + b * c}
+means to multiply @code{b} and @code{c}, and then add @code{a} to the
+product (i.e.@: @samp{a + (b * c)}).
+
+You can overrule the precedence of the operators by using parentheses.
+You can think of the precedence rules as saying where the
+parentheses are assumed to be if you do not write parentheses yourself.  In
+fact, it is wise to always use parentheses whenever you have an unusual
+combination of operators, because other people who read the program may
+not remember what the precedence is in this case.  You might forget,
+too; then you could make a mistake.  Explicit parentheses will help prevent
+any such mistake.
+
+When operators of equal precedence are used together, the leftmost
+operator groups first, except for the assignment, conditional and
+exponentiation operators, which group in the opposite order.
+Thus, @samp{a - b + c} groups as @samp{(a - b) + c}, and
+@samp{a = b = c} groups as @samp{a = (b = c)}.
+
+The precedence of prefix unary operators does not matter as long as only
+unary operators are involved, because there is only one way to interpret
+them---innermost first.  Thus, @samp{$++i} means @samp{$(++i)} and
+@samp{++$x} means @samp{++($x)}.  However, when another operator follows
+the operand, then the precedence of the unary operators can matter.
+Thus, @samp{$x^2} means @samp{($x)^2}, but @samp{-x^2} means
+@samp{-(x^2)}, because @samp{-} has lower precedence than @samp{^}
+while @samp{$} has higher precedence.
+
+Here is a table of @code{awk}'s operators, in order from highest
+precedence to lowest:
+
+@c use @code in the items, looks better in TeX w/o all the quotes
+@table @code
+@item (@dots{})
+Grouping.
+
+@item $
+Field.
+
+@item ++ --
+Increment, decrement.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item ^ **
+Exponentiation.  These operators group right-to-left.
+(The @samp{**} operator is not specified by POSIX.)
+
+@item + - !
+Unary plus, minus, logical ``not''.
+
+@item * / %
+Multiplication, division, modulus.
+
+@item + -
+Addition, subtraction.
+
+@item @r{Concatenation}
+No special token is used to indicate concatenation.
+The operands are simply written side by side.
+
+@item < <= == !=
+@itemx > >= >> |
+Relational, and redirection.
+The relational operators and the redirections have the same precedence
+level.  Characters such as @samp{>} serve both as relationals and as
+redirections; the context distinguishes between the two meanings.
+
+Note that the I/O redirection operators in @code{print} and @code{printf}
+statements belong to the statement level, not to expressions.  The
+redirection does not produce an expression which could be the operand of
+another operator.  As a result, it does not make sense to use a
+redirection operator near another operator of lower precedence, without
+parentheses.  Such combinations, for example @samp{print foo > a ? b : c},
+result in syntax errors.
+The correct way to write this statement is @samp{print foo > (a ? b : c)}.
+
+@item ~ !~
+Matching, non-matching.
+
+@item in
+Array membership.
+
+@item &&
+Logical ``and''.
+
+@item ||
+Logical ``or''.
+
+@item ?:
+Conditional.  This operator groups right-to-left.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item = += -= *=
+@itemx /= %= ^= **=
+Assignment.  These operators group right-to-left.
+(The @samp{**=} operator is not specified by POSIX.)
+@end table
+
+@node Patterns and Actions, Statements, Expressions, Top
+@chapter Patterns and Actions
+@cindex pattern, definition of
+
+As you have already seen, each @code{awk} statement consists of
+a pattern with an associated action.  This chapter describes how
+you build patterns and actions.
+
+@menu
+* Pattern Overview::            What goes into a pattern.
+* Action Overview::             What goes into an action.
+@end menu
+
+@node Pattern Overview, Action Overview, Patterns and Actions, Patterns and Actions
+@section Pattern Elements
+
+Patterns in @code{awk} control the execution of rules: a rule is
+executed when its pattern matches the current input record.  This
+section explains all about how to write patterns.
+
+@menu
+* Kinds of Patterns::           A list of all kinds of patterns.
+* Regexp Patterns::             Using regexps as patterns.
+* Expression Patterns::         Any expression can be used as a pattern.
+* Ranges::                      Pairs of patterns specify record ranges.
+* BEGIN/END::                   Specifying initialization and cleanup rules.
+* Empty::                       The empty pattern, which matches every record.
+@end menu
+
+@node Kinds of Patterns, Regexp Patterns, Pattern Overview, Pattern Overview
+@subsection Kinds of Patterns
+@cindex patterns, types of
+
+Here is a summary of the types of patterns supported in @code{awk}.
+
+@table @code
+@item /@var{regular expression}/
+A regular expression as a pattern.  It matches when the text of the
+input record fits the regular expression.
+(@xref{Regexp, ,Regular Expressions}.)
+
+@item @var{expression}
+A single expression.  It matches when its value
+is non-zero (if a number) or non-null (if a string).
+(@xref{Expression Patterns, ,Expressions as Patterns}.)
+
+@item @var{pat1}, @var{pat2}
+A pair of patterns separated by a comma, specifying a range of records.
+The range includes both the initial record that matches @var{pat1}, and
+the final record that matches @var{pat2}.
+(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
+
+@item BEGIN
+@itemx END
+Special patterns for you to supply start-up or clean-up actions for your
+@code{awk} program.
+(@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.)
+
+@item @var{empty}
+The empty pattern matches every input record.
+(@xref{Empty, ,The Empty Pattern}.)
+@end table
+
+@node Regexp Patterns, Expression Patterns, Kinds of Patterns, Pattern Overview
+@subsection Regular Expressions as Patterns
+
+We have been using regular expressions as patterns since our early examples.
+This kind of pattern is simply a regexp constant in the pattern part of
+a rule.  Its  meaning is @samp{$0 ~ /@var{pattern}/}.
+The pattern matches when the input record matches the regexp.
+For example:
+
+@example
+/foo|bar|baz/  @{ buzzwords++ @}
+END            @{ print buzzwords, "buzzwords seen" @}
+@end example
+
+@node Expression Patterns, Ranges, Regexp Patterns, Pattern Overview
+@subsection Expressions as Patterns
+
+Any @code{awk} expression is valid as an @code{awk} pattern.
+Then the pattern matches if the expression's value is non-zero (if a
+number) or non-null (if a string).
+
+The expression is reevaluated each time the rule is tested against a new
+input record.  If the expression uses fields such as @code{$1}, the
+value depends directly on the new input record's text; otherwise, it
+depends only on what has happened so far in the execution of the
+@code{awk} program, but that may still be useful.
+
+A very common kind of expression used as a pattern is the comparison
+expression, using the comparison operators described in
+@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+Regexp matching and non-matching are also very common expressions.
+The left operand of the @samp{~} and @samp{!~} operators is a string.
+The right operand is either a constant regular expression enclosed in
+slashes (@code{/@var{regexp}/}), or any expression, whose string value
+is used as a dynamic regular expression
+(@pxref{Computed Regexps, , Using Dynamic Regexps}).
+
+The following example prints the second field of each input record
+whose first field is precisely @samp{foo}.
+
+@example
+$ awk '$1 == "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+(There is no output, since there is no BBS site named ``foo''.)
+Contrast this with the following regular expression match, which would
+accept any record with a first field that contains @samp{foo}:
+
+@example
+@group
+$ awk '$1 ~ /foo/ @{ print $2 @}' BBS-list
+@print{} 555-1234
+@print{} 555-6699
+@print{} 555-6480
+@print{} 555-2127
+@end group
+@end example
+
+Boolean expressions are also commonly used as patterns.
+Whether the pattern
+matches an input record depends on whether its subexpressions match.
+
+For example, the following command prints all records in
+@file{BBS-list} that contain both @samp{2400} and @samp{foo}.
+
+@example
+$ awk '/2400/ && /foo/' BBS-list
+@print{} fooey        555-1234     2400/1200/300     B
+@end example
+
+The following command prints all records in
+@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo}, or
+both.
+
+@example
+@group
+$ awk '/2400/ || /foo/' BBS-list
+@print{} alpo-net     555-3412     2400/1200/300     A
+@print{} bites        555-1675     2400/1200/300     A
+@print{} fooey        555-1234     2400/1200/300     B
+@print{} foot         555-6699     1200/300          B
+@print{} macfoo       555-6480     1200/300          A
+@print{} sdace        555-3430     2400/1200/300     A
+@print{} sabafoo      555-2127     1200/300          C
+@end group
+@end example
+
+The following command prints all records in
+@file{BBS-list} that do @emph{not} contain the string @samp{foo}.
+
+@example
+@group
+$ awk '! /foo/' BBS-list
+@print{} aardvark     555-5553     1200/300          B
+@print{} alpo-net     555-3412     2400/1200/300     A
+@print{} barfly       555-7685     1200/300          A
+@print{} bites        555-1675     2400/1200/300     A
+@print{} camelot      555-0542     300               C
+@print{} core         555-2912     1200/300          C
+@print{} sdace        555-3430     2400/1200/300     A
+@end group
+@end example
+
+The subexpressions of a boolean operator in a pattern can be constant regular
+expressions, comparisons, or any other @code{awk} expressions.  Range
+patterns are not expressions, so they cannot appear inside boolean
+patterns.  Likewise, the special patterns @code{BEGIN} and @code{END},
+which never match any input record, are not expressions and cannot
+appear inside boolean patterns.
+
+A regexp constant as a pattern is also a special case of an expression
+pattern.  @code{/foo/} as an expression has the value one if @samp{foo}
+appears in the current input record; thus, as a pattern, @code{/foo/}
+matches any record containing @samp{foo}.
+
+@node Ranges, BEGIN/END, Expression Patterns, Pattern Overview
+@subsection Specifying Record Ranges with Patterns
+
+@cindex range pattern
+@cindex pattern, range
+@cindex matching ranges of lines
+A @dfn{range pattern} is made of two patterns separated by a comma, of
+the form @samp{@var{begpat}, @var{endpat}}.  It matches ranges of
+consecutive input records.  The first pattern, @var{begpat}, controls
+where the range begins, and the second one, @var{endpat}, controls where
+it ends.  For example,
+
+@example
+awk '$1 == "on", $1 == "off"'
+@end example
+
+@noindent
+prints every record between @samp{on}/@samp{off} pairs, inclusive.
+
+A range pattern starts out by matching @var{begpat}
+against every input record; when a record matches @var{begpat}, the
+range pattern becomes @dfn{turned on}.  The range pattern matches this
+record.  As long as it stays turned on, it automatically matches every
+input record read.  It also matches @var{endpat} against
+every input record; when that succeeds, the range pattern is turned
+off again for the following record.  Then it goes back to checking
+@var{begpat} against each record.
+
+The record that turns on the range pattern and the one that turns it
+off both match the range pattern.  If you don't want to operate on
+these records, you can write @code{if} statements in the rule's action
+to distinguish them from the records you are interested in.
+
+It is possible for a pattern to be turned both on and off by the same
+record, if the record satisfies both conditions.  Then the action is
+executed for just that record.
+
+For example, suppose you have text between two identical markers (say
+the @samp{%} symbol) that you wish to ignore.  You might try to
+combine a range pattern that describes the delimited text with the
+@code{next} statement
+(not discussed yet, @pxref{Next Statement, , The @code{next} Statement}),
+which causes @code{awk} to skip any further processing of the current
+record and start over again with the next input record. Such a program
+would look like this:
+
+@example
+/^%$/,/^%$/    @{ next @}
+               @{ print @}
+@end example
+
+@noindent
+@cindex skipping lines between markers
+This program fails because the range pattern is both turned on and turned off
+by the first line with just a @samp{%} on it.  To accomplish this task, you
+must write the program this way, using a flag:
+
+@example
+/^%$/     @{ skip = ! skip; next @}
+skip == 1 @{ next @} # skip lines with `skip' set
+@end example
+
+Note that in a range pattern, the @samp{,} has the lowest precedence
+(is evaluated last) of all the operators.  Thus, for example, the
+following program attempts to combine a range pattern with another,
+simpler test.
+
+@example
+echo Yes | awk '/1/,/2/ || /Yes/'
+@end example
+
+The author of this program intended it to mean @samp{(/1/,/2/) || /Yes/}.
+However, @code{awk} interprets this as @samp{/1/, (/2/ || /Yes/)}.
+This cannot be changed or worked around; range patterns do not combine
+with other patterns.
+
+@node BEGIN/END, Empty, Ranges, Pattern Overview
+@subsection The @code{BEGIN} and @code{END} Special Patterns
+
+@cindex @code{BEGIN} special pattern
+@cindex pattern, @code{BEGIN}
+@cindex @code{END} special pattern
+@cindex pattern, @code{END}
+@code{BEGIN} and @code{END} are special patterns.  They are not used to
+match input records.  Rather, they supply start-up or
+clean-up actions for your @code{awk} script.
+
+@menu
+* Using BEGIN/END::             How and why to use BEGIN/END rules.
+* I/O And BEGIN/END::           I/O issues in BEGIN/END rules.
+@end menu
+
+@node Using BEGIN/END, I/O And BEGIN/END, BEGIN/END, BEGIN/END
+@subsubsection Startup and Cleanup Actions
+
+A @code{BEGIN} rule is executed, once, before the first input record
+has been read.  An @code{END} rule is executed, once, after all the
+input has been read.  For example:
+
+@example
+@group
+$ awk '
+> BEGIN @{ print "Analysis of \"foo\"" @}
+> /foo/ @{ ++n @}
+> END   @{ print "\"foo\" appears " n " times." @}' BBS-list
+@print{} Analysis of "foo"
+@print{} "foo" appears 4 times.
+@end group
+@end example
+
+This program finds the number of records in the input file @file{BBS-list}
+that contain the string @samp{foo}.  The @code{BEGIN} rule prints a title
+for the report.  There is no need to use the @code{BEGIN} rule to
+initialize the counter @code{n} to zero, as @code{awk} does this
+automatically (@pxref{Variables}).
+
+The second rule increments the variable @code{n} every time a
+record containing the pattern @samp{foo} is read.  The @code{END} rule
+prints the value of @code{n} at the end of the run.
+
+The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
+or with boolean operators (indeed, they cannot be used with any operators).
+
+An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
+rules.  They are executed in the order they appear, all the @code{BEGIN}
+rules at start-up and all the @code{END} rules at termination.
+@code{BEGIN} and @code{END} rules may be intermixed with other rules.
+This feature was added in the 1987 version of @code{awk}, and is included
+in the POSIX standard.  The original (1978) version of @code{awk}
+required you to put the @code{BEGIN} rule at the beginning of the
+program, and the @code{END} rule at the end, and only allowed one of
+each.  This is no longer required, but it is a good idea in terms of
+program organization and readability.
+
+Multiple @code{BEGIN} and @code{END} rules are useful for writing
+library functions, since each library file can have its own @code{BEGIN} and/or
+@code{END} rule to do its own initialization and/or cleanup.  Note that
+the order in which library functions are named on the command line
+controls the order in which their @code{BEGIN} and @code{END} rules are
+executed.  Therefore you have to be careful to write such rules in
+library files so that the order in which they are executed doesn't matter.
+@xref{Options, ,Command Line Options}, for more information on
+using library functions.
+@xref{Library Functions, ,A Library of @code{awk} Functions},
+for a number of useful library functions.
+
+@cindex dark corner
+If an @code{awk} program only has a @code{BEGIN} rule, and no other
+rules, then the program exits after the @code{BEGIN} rule has been run.
+(The original version of @code{awk} used to keep reading and ignoring input
+until end of file was seen.)  However, if an @code{END} rule exists,
+then the input will be read, even if there are no other rules in
+the program.  This is necessary in case the @code{END} rule checks the
+@code{FNR} and @code{NR} variables (d.c.).
+
+@code{BEGIN} and @code{END} rules must have actions; there is no default
+action for these rules since there is no current record when they run.
+
+@node I/O And BEGIN/END, , Using BEGIN/END, BEGIN/END
+@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules
+
+@cindex I/O from @code{BEGIN} and @code{END}
+There are several (sometimes subtle) issues involved when doing I/O
+from a @code{BEGIN} or @code{END} rule.
+
+The first has to do with the value of @code{$0} in a @code{BEGIN}
+rule.  Since @code{BEGIN} rules are executed before any input is read,
+there simply is no input record, and therefore no fields, when
+executing @code{BEGIN} rules.  References to @code{$0} and the fields
+yield a null string or zero, depending upon the context.  One way
+to give @code{$0} a real value is to execute a @code{getline} command
+without a variable (@pxref{Getline, ,Explicit Input with @code{getline}}).
+Another way is to simply assign a value to it.
+
+@cindex differences between @code{gawk} and @code{awk}
+The second point is similar to the first, but from the other direction.
+Inside an @code{END} rule, what is the value of @code{$0} and @code{NF}?
+Traditionally, due largely to implementation issues, @code{$0} and
+@code{NF} were @emph{undefined} inside an @code{END} rule.
+The POSIX standard specified that @code{NF} was available in an @code{END}
+rule, containing the number of fields from the last input record.
+Due most probably to an oversight, the standard does not say that @code{$0}
+is also preserved, although logically one would think that it should be.
+In fact, @code{gawk} does preserve the value of @code{$0} for use in
+@code{END} rules.  Be aware, however, that Unix @code{awk}, and possibly
+other implementations, do not.
+
+The third point follows from the first two.  What is the meaning of
+@samp{print} inside a @code{BEGIN} or @code{END} rule?  The meaning is
+the same as always, @samp{print $0}.  If @code{$0} is the null string,
+then this prints an empty line.  Many long time @code{awk} programmers
+use @samp{print} in @code{BEGIN} and @code{END} rules, to mean
+@samp{@w{print ""}}, relying on @code{$0} being null.  While you might
+generally get away with this in @code{BEGIN} rules, in @code{gawk} at
+least, it is a very bad idea in @code{END} rules.  It is also poor
+style, since if you want an empty line in the output, you
+should say so explicitly in your program.
+
+@node Empty,  , BEGIN/END, Pattern Overview
+@subsection The Empty Pattern
+
+@cindex empty pattern
+@cindex pattern, empty
+An empty (i.e.@: non-existent) pattern is considered to match @emph{every}
+input record.  For example, the program:
+
+@example
+awk '@{ print $1 @}' BBS-list
+@end example
+
+@noindent
+prints the first field of every record.
+
+@node Action Overview,  , Pattern Overview, Patterns and Actions
+@section Overview of Actions
+@cindex action, definition of
+@cindex curly braces
+@cindex action, curly braces
+@cindex action, separating statements
+
+An @code{awk} program or script consists of a series of
+rules and function definitions, interspersed.  (Functions are
+described later.  @xref{User-defined, ,User-defined Functions}.)
+
+A rule contains a pattern and an action, either of which (but not
+both) may be
+omitted.  The purpose of the @dfn{action} is to tell @code{awk} what to do
+once a match for the pattern is found.  Thus, in outline, an @code{awk}
+program generally looks like this:
+
+@example
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@dots{}
+function @var{name}(@var{args}) @{ @dots{} @}
+@dots{}
+@end example
+
+An action consists of one or more @code{awk} @dfn{statements}, enclosed
+in curly braces (@samp{@{} and @samp{@}}).  Each statement specifies one
+thing to be done.  The statements are separated by newlines or
+semicolons.
+
+The curly braces around an action must be used even if the action
+contains only one statement, or even if it contains no statements at
+all.  However, if you omit the action entirely, omit the curly braces as
+well.  An omitted action is equivalent to @samp{@{ print $0 @}}.
+
+@example
+/foo/  @{ @}  # match foo, do nothing - empty action
+/foo/       # match foo, print the record - omitted action
+@end example
+
+Here are the kinds of statements supported in @code{awk}:
+
+@itemize @bullet
+@item
+Expressions, which can call functions or assign values to variables
+(@pxref{Expressions}).  Executing
+this kind of statement simply computes the value of the expression.
+This is useful when the expression has side effects
+(@pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Control statements, which specify the control flow of @code{awk}
+programs.  The @code{awk} language gives you C-like constructs
+(@code{if}, @code{for}, @code{while}, and @code{do}) as well as a few
+special ones (@pxref{Statements, ,Control Statements in Actions}).
+
+@item
+Compound statements, which consist of one or more statements enclosed in
+curly braces.  A compound statement is used in order to put several
+statements together in the body of an @code{if}, @code{while}, @code{do}
+or @code{for} statement.
+
+@item
+Input statements, using the @code{getline} command
+(@pxref{Getline, ,Explicit Input with @code{getline}}), the @code{next}
+statement (@pxref{Next Statement, ,The @code{next} Statement}),
+and the @code{nextfile} statement
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+
+@item
+Output statements, @code{print} and @code{printf}.
+@xref{Printing, ,Printing Output}.
+
+@item
+Deletion statements, for deleting array elements.
+@xref{Delete, ,The @code{delete} Statement}.
+@end itemize
+
+@iftex
+The next chapter covers control statements in detail.
+@end iftex
+
+@node Statements, Built-in Variables, Patterns and Actions, Top
+@chapter Control Statements in Actions
+@cindex control statement
+
+@dfn{Control statements} such as @code{if}, @code{while}, and so on
+control the flow of execution in @code{awk} programs.  Most of the
+control statements in @code{awk} are patterned on similar statements in
+C.
+
+All the control statements start with special keywords such as @code{if}
+and @code{while}, to distinguish them from simple expressions.
+
+@cindex compound statement
+@cindex statement, compound
+Many control statements contain other statements; for example, the
+@code{if} statement contains another statement which may or may not be
+executed.  The contained statement is called the @dfn{body}.  If you
+want to include more than one statement in the body, group them into a
+single @dfn{compound statement} with curly braces, separating them with
+newlines or semicolons.
+
+@menu
+* If Statement::                Conditionally execute some @code{awk}
+                                statements.
+* While Statement::             Loop until some condition is satisfied.
+* Do Statement::                Do specified action while looping until some
+                                condition is satisfied.
+* For Statement::               Another looping statement, that provides
+                                initialization and increment clauses.
+* Break Statement::             Immediately exit the innermost enclosing loop.
+* Continue Statement::          Skip to the end of the innermost enclosing
+                                loop.
+* Next Statement::              Stop processing the current input record.
+* Nextfile Statement::          Stop processing the current file.
+* Exit Statement::              Stop execution of @code{awk}.
+@end menu
+
+@node If Statement, While Statement, Statements, Statements
+@section The @code{if}-@code{else} Statement
+
+@cindex @code{if}-@code{else} statement
+The @code{if}-@code{else} statement is @code{awk}'s decision-making
+statement.  It looks like this:
+
+@example
+if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
+@end example
+
+@noindent
+The @var{condition} is an expression that controls what the rest of the
+statement will do.  If @var{condition} is true, @var{then-body} is
+executed; otherwise, @var{else-body} is executed.
+The @code{else} part of the statement is
+optional.  The condition is considered false if its value is zero or
+the null string, and true otherwise.
+
+Here is an example:
+
+@example
+if (x % 2 == 0)
+    print "x is even"
+else
+    print "x is odd"
+@end example
+
+In this example, if the expression @samp{x % 2 == 0} is true (that is,
+the value of @code{x} is evenly divisible by two), then the first @code{print}
+statement is executed, otherwise the second @code{print} statement is
+executed.
+
+If the @code{else} appears on the same line as @var{then-body}, and
+@var{then-body} is not a compound statement (i.e.@: not surrounded by
+curly braces), then a semicolon must separate @var{then-body} from
+@code{else}.  To illustrate this, let's rewrite the previous example:
+
+@example
+if (x % 2 == 0) print "x is even"; else
+        print "x is odd"
+@end example
+
+@noindent
+If you forget the @samp{;}, @code{awk} won't be able to interpret the
+statement, and you will get a syntax error.
+
+We would not actually write this example this way, because a human
+reader might fail to see the @code{else} if it were not the first thing
+on its line.
+
+@node While Statement, Do Statement, If Statement, Statements
+@section The @code{while} Statement
+@cindex @code{while} statement
+@cindex loop
+@cindex body of a loop
+
+In programming, a @dfn{loop} means a part of a program that can
+be executed two or more times in succession.
+
+The @code{while} statement is the simplest looping statement in
+@code{awk}.  It repeatedly executes a statement as long as a condition is
+true.  It looks like this:
+
+@example
+while (@var{condition})
+  @var{body}
+@end example
+
+@noindent
+Here @var{body} is a statement that we call the @dfn{body} of the loop,
+and @var{condition} is an expression that controls how long the loop
+keeps running.
+
+The first thing the @code{while} statement does is test @var{condition}.
+If @var{condition} is true, it executes the statement @var{body}.
+@ifinfo
+(The @var{condition} is true when the value 
+is not zero and not a null string.)
+@end ifinfo
+After @var{body} has been executed,
+@var{condition} is tested again, and if it is still true, @var{body} is
+executed again.  This process repeats until @var{condition} is no longer
+true.  If @var{condition} is initially false, the body of the loop is
+never executed, and @code{awk} continues with the statement following
+the loop.
+
+This example prints the first three fields of each record, one per line.
+
+@example
+awk '@{ i = 1
+       while (i <= 3) @{
+           print $i
+           i++
+       @}
+@}' inventory-shipped
+@end example
+
+@noindent
+Here the body of the loop is a compound statement enclosed in braces,
+containing two statements.
+
+The loop works like this: first, the value of @code{i} is set to one.
+Then, the @code{while} tests whether @code{i} is less than or equal to
+three.  This is true when @code{i} equals one, so the @code{i}-th
+field is printed.  Then the @samp{i++} increments the value of @code{i}
+and the loop repeats.  The loop terminates when @code{i} reaches four.
+
+As you can see, a newline is not required between the condition and the
+body; but using one makes the program clearer unless the body is a
+compound statement or is very simple.  The newline after the open-brace
+that begins the compound statement is not required either, but the
+program would be harder to read without it.
+
+@node Do Statement, For Statement, While Statement, Statements
+@section The @code{do}-@code{while} Statement
+
+The @code{do} loop is a variation of the @code{while} looping statement.
+The @code{do} loop executes the @var{body} once, and then repeats @var{body}
+as long as @var{condition} is true.  It looks like this:
+
+@example
+@group
+do
+  @var{body}
+while (@var{condition})
+@end group
+@end example
+
+Even if @var{condition} is false at the start, @var{body} is executed at
+least once (and only once, unless executing @var{body} makes
+@var{condition} true).  Contrast this with the corresponding
+@code{while} statement:
+
+@example
+while (@var{condition})
+  @var{body}
+@end example
+
+@noindent
+This statement does not execute @var{body} even once if @var{condition}
+is false to begin with.
+
+Here is an example of a @code{do} statement:
+
+@example
+awk '@{ i = 1
+       do @{
+          print $0
+          i++
+       @} while (i <= 10)
+@}'
+@end example
+
+@noindent
+This program prints each input record ten times.  It isn't a very
+realistic example, since in this case an ordinary @code{while} would do
+just as well.  But this reflects actual experience; there is only
+occasionally a real use for a @code{do} statement.
+
+@node For Statement, Break Statement, Do Statement, Statements
+@section The @code{for} Statement
+@cindex @code{for} statement
+
+The @code{for} statement makes it more convenient to count iterations of a
+loop.  The general form of the @code{for} statement looks like this:
+
+@example
+for (@var{initialization}; @var{condition}; @var{increment})
+  @var{body}
+@end example
+
+@noindent
+The @var{initialization}, @var{condition} and @var{increment} parts are
+arbitrary @code{awk} expressions, and @var{body} stands for any
+@code{awk} statement.
+
+The @code{for} statement starts by executing @var{initialization}.
+Then, as long
+as @var{condition} is true, it repeatedly executes @var{body} and then
+@var{increment}.  Typically @var{initialization} sets a variable to
+either zero or one, @var{increment} adds one to it, and @var{condition}
+compares it against the desired number of iterations.
+
+Here is an example of a @code{for} statement:
+
+@example
+@group
+awk '@{ for (i = 1; i <= 3; i++)
+          print $i
+@}' inventory-shipped
+@end group
+@end example
+
+@noindent
+This prints the first three fields of each input record, one field per
+line.
+
+You cannot set more than one variable in the
+@var{initialization} part unless you use a multiple assignment statement
+such as @samp{x = y = 0}, which is possible only if all the initial values
+are equal.  (But you can initialize additional variables by writing
+their assignments as separate statements preceding the @code{for} loop.)
+
+The same is true of the @var{increment} part; to increment additional
+variables, you must write separate statements at the end of the loop.
+The C compound expression, using C's comma operator, would be useful in
+this context, but it is not supported in @code{awk}.
+
+Most often, @var{increment} is an increment expression, as in the
+example above.  But this is not required; it can be any expression
+whatever.  For example, this statement prints all the powers of two
+between one and 100:
+
+@example
+for (i = 1; i <= 100; i *= 2)
+  print i
+@end example
+
+Any of the three expressions in the parentheses following the @code{for} may
+be omitted if there is nothing to be done there.  Thus, @w{@samp{for (; x
+> 0;)}} is equivalent to @w{@samp{while (x > 0)}}.  If the
+@var{condition} is omitted, it is treated as @var{true}, effectively
+yielding an @dfn{infinite loop} (i.e.@: a loop that will never
+terminate).
+
+In most cases, a @code{for} loop is an abbreviation for a @code{while}
+loop, as shown here:
+
+@example
+@var{initialization}
+while (@var{condition}) @{
+  @var{body}
+  @var{increment}
+@}
+@end example
+
+@noindent
+The only exception is when the @code{continue} statement
+(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
+inside the loop; changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.
+
+There is an alternate version of the @code{for} loop, for iterating over
+all the indices of an array:
+
+@example
+for (i in array)
+    @var{do something with} array[i]
+@end example
+
+@noindent
+@xref{Scanning an Array, ,Scanning All Elements of an Array},
+for more information on this version of the @code{for} loop.
+
+The @code{awk} language has a @code{for} statement in addition to a
+@code{while} statement because often a @code{for} loop is both less work to
+type and more natural to think of.  Counting the number of iterations is
+very common in loops.  It can be easier to think of this counting as part
+of looping rather than as something to do inside the loop.
+
+The next section has more complicated examples of @code{for} loops.
+
+@node Break Statement, Continue Statement, For Statement, Statements
+@section The @code{break} Statement
+@cindex @code{break} statement
+@cindex loops, exiting
+
+The @code{break} statement jumps out of the innermost @code{for},
+@code{while}, or @code{do} loop that encloses it.  The
+following example finds the smallest divisor of any integer, and also
+identifies prime numbers:
+
+@example
+awk '# find smallest divisor of num
+     @{ num = $1
+       for (div = 2; div*div <= num; div++)
+         if (num % div == 0)
+           break
+       if (num % div == 0)
+         printf "Smallest divisor of %d is %d\n", num, div
+       else
+         printf "%d is prime\n", num
+     @}'
+@end example
+
+When the remainder is zero in the first @code{if} statement, @code{awk}
+immediately @dfn{breaks out} of the containing @code{for} loop.  This means
+that @code{awk} proceeds immediately to the statement following the loop
+and continues processing.  (This is very different from the @code{exit}
+statement which stops the entire @code{awk} program.  
+@xref{Exit Statement, ,The @code{exit} Statement}.)
+
+Here is another program equivalent to the previous one.  It illustrates how
+the @var{condition} of a @code{for} or @code{while} could just as well be
+replaced with a @code{break} inside an @code{if}:
+
+@example
+@group
+awk '# find smallest divisor of num
+     @{ num = $1
+       for (div = 2; ; div++) @{
+         if (num % div == 0) @{
+           printf "Smallest divisor of %d is %d\n", num, div
+           break
+         @}
+         if (div*div > num) @{
+           printf "%d is prime\n", num
+           break
+         @}
+       @}
+@}'
+@end group
+@end example
+
+@cindex @code{break}, outside of loops
+@cindex historical features
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex dark corner
+As described above, the @code{break} statement has no meaning when
+used outside the body of a loop.  However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{break}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+Recent versions of Unix @code{awk} no longer allow this usage. 
+@code{gawk} will support this use of @code{break} only if @samp{--traditional}
+has been specified on the command line
+(@pxref{Options, ,Command Line Options}).
+Otherwise, it will be treated as an error, since the POSIX standard
+specifies that @code{break} should only be used inside the body of a
+loop (d.c.).
+
+@node Continue Statement, Next Statement, Break Statement, Statements
+@section The @code{continue} Statement
+
+@cindex @code{continue} statement
+The @code{continue} statement, like @code{break}, is used only inside
+@code{for}, @code{while}, and @code{do} loops.  It skips
+over the rest of the loop body, causing the next cycle around the loop
+to begin immediately.  Contrast this with @code{break}, which jumps out
+of the loop altogether.
+
+@c The point of this program was to illustrate the use of continue with
+@c a while loop. But Karl Berry points out that that is done adequately
+@c below, and that this example is very un-awk-like. So for now, we'll
+@c omit it.
+@ignore
+In Texinfo source files, text that the author wishes to ignore can be
+enclosed between lines that start with @samp{@@ignore} and end with
+@samp{@@end ignore}.  Here is a program that strips out lines between
+@samp{@@ignore} and @samp{@@end ignore} pairs.
+
+@example
+BEGIN @{
+    while (getline > 0) @{
+       if (/^@@ignore/)
+           ignoring = 1
+       else if (/^@@end[ \t]+ignore/) @{
+           ignoring = 0
+           continue
+       @}
+       if (ignoring)
+           continue
+       print
+    @}
+@}
+@end example
+
+When an @samp{@@ignore} is seen, the @code{ignoring} flag is set to one (true).
+When @samp{@@end ignore} is seen, the flag is reset to zero (false). As long
+as the flag is true, the input record is not printed, because the
+@code{continue} restarts the @code{while} loop, skipping over the @code{print}
+statement.
+
+@c Exercise!!!
+@c How could this program be written to make better use of the awk language?
+@end ignore
+
+The @code{continue} statement in a @code{for} loop directs @code{awk} to
+skip the rest of the body of the loop, and resume execution with the
+increment-expression of the @code{for} statement.  The following program
+illustrates this fact:
+
+@example
+awk 'BEGIN @{
+     for (x = 0; x <= 20; x++) @{
+         if (x == 5)
+             continue
+         printf "%d ", x
+     @}
+     print ""
+@}'
+@end example
+
+@noindent
+This program prints all the numbers from zero to 20, except for five, for
+which the @code{printf} is skipped.  Since the increment @samp{x++}
+is not skipped, @code{x} does not remain stuck at five.  Contrast the
+@code{for} loop above with this @code{while} loop:
+
+@example
+awk 'BEGIN @{
+     x = 0
+     while (x <= 20) @{
+         if (x == 5)
+             continue
+         printf "%d ", x
+         x++
+     @}
+     print ""
+@}'
+@end example
+
+@noindent
+This program loops forever once @code{x} gets to five.
+
+@cindex @code{continue}, outside of loops
+@cindex historical features
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex dark corner
+As described above, the @code{continue} statement has no meaning when
+used outside the body of a loop.  However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{continue}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+Recent versions of Unix @code{awk} no longer allow this usage. 
+@code{gawk} will support this use of @code{continue} only if
+@samp{--traditional} has been specified on the command line
+(@pxref{Options, ,Command Line Options}).
+Otherwise, it will be treated as an error, since the POSIX standard
+specifies that @code{continue} should only be used inside the body of a
+loop (d.c.).
+
+@node Next Statement, Nextfile Statement, Continue Statement, Statements
+@section The @code{next} Statement
+@cindex @code{next} statement
+
+The @code{next} statement forces @code{awk} to immediately stop processing
+the current record and go on to the next record.  This means that no
+further rules are executed for the current record.  The rest of the
+current rule's action is not executed either.
+
+Contrast this with the effect of the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).  That too causes
+@code{awk} to read the next record immediately, but it does not alter the
+flow of control in any way.  So the rest of the current action executes
+with a new input record.
+
+At the highest level, @code{awk} program execution is a loop that reads
+an input record and then tests each rule's pattern against it.  If you
+think of this loop as a @code{for} statement whose body contains the
+rules, then the @code{next} statement is analogous to a @code{continue}
+statement: it skips to the end of the body of this implicit loop, and
+executes the increment (which reads another record).
+
+For example, if your @code{awk} program works only on records with four
+fields, and you don't want it to fail when given bad input, you might
+use this rule near the beginning of the program:
+
+@example
+@group
+NF != 4 @{
+  err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR)
+  print err > "/dev/stderr"
+  next
+@}
+@end group
+@end example
+
+@noindent
+so that the following rules will not see the bad record.  The error
+message is redirected to the standard error output stream, as error
+messages should be.  @xref{Special Files, ,Special File Names in @code{gawk}}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, the behavior is undefined if
+the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@code{gawk} will treat it as a syntax error.
+Although POSIX permits it,
+some other @code{awk} implementations don't allow the @code{next}
+statement inside function bodies
+(@pxref{User-defined, ,User-defined Functions}).
+Just as any other @code{next} statement, a @code{next} inside a
+function body reads the next record and starts processing it with the
+first rule in the program.
+
+If the @code{next} statement causes the end of the input to be reached,
+then the code in any @code{END} rules will be executed.
+@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.
+
+@cindex @code{next}, inside a user-defined function
+@strong{Caution:} Some @code{awk} implementations generate a run-time
+error if you use the @code{next} statement inside a user-defined function
+(@pxref{User-defined, , User-defined Functions}).
+@code{gawk} does not have this problem.
+
+@node Nextfile Statement, Exit Statement, Next Statement, Statements
+@section The @code{nextfile} Statement
+@cindex @code{nextfile} statement
+@cindex differences between @code{gawk} and @code{awk}
+
+@code{gawk} provides the @code{nextfile} statement,
+which is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{nextfile} statement instructs @code{gawk} to stop processing the
+current data file.
+
+Upon execution of the @code{nextfile} statement, @code{FILENAME} is
+updated to the name of the next data file listed on the command line,
+@code{FNR} is reset to one, @code{ARGIND} is incremented, and processing
+starts over with the first rule in the progam.  @xref{Built-in Variables}.
+
+If the @code{nextfile} statement causes the end of the input to be reached,
+then the code in any @code{END} rules will be executed.
+@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.
+
+The @code{nextfile} statement is a @code{gawk} extension; it is not
+(currently) available in any other @code{awk} implementation.
+@xref{Nextfile Function, ,Implementing @code{nextfile} as a Function},
+for a user-defined function you can use to simulate the @code{nextfile}
+statement.
+
+The @code{nextfile} statement would be useful if you have many data
+files to process, and you expect that you
+would not want to process every record in every file.
+Normally, in order to move on to
+the next data file, you would have to continue scanning the unwanted
+records.  The @code{nextfile} statement accomplishes this much more
+efficiently.
+
+@cindex @code{next file} statement
+@strong{Caution:}  Versions of @code{gawk} prior to 3.0 used two
+words (@samp{next file}) for the @code{nextfile} statement.  This was
+changed in 3.0 to one word, since the treatment of @samp{file} was
+inconsistent. When it appeared after @code{next}, it was a keyword.
+Otherwise, it was a regular identifier.  The old usage is still
+accepted. However, @code{gawk} will generate a warning message, and
+support for @code{next file} will eventually be discontinued in a
+future version of @code{gawk}.
+
+@node Exit Statement,  , Nextfile Statement, Statements
+@section The @code{exit} Statement
+
+@cindex @code{exit} statement
+The @code{exit} statement causes @code{awk} to immediately stop
+executing the current rule and to stop processing input; any remaining input
+is ignored.  It looks like this:
+
+@example
+exit @r{[}@var{return code}@r{]}
+@end example
+
+If an @code{exit} statement is executed from a @code{BEGIN} rule the
+program stops processing everything immediately.  No input records are
+read.  However, if an @code{END} rule is present, it is executed
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+
+If @code{exit} is used as part of an @code{END} rule, it causes
+the program to stop immediately.
+
+An @code{exit} statement that is not part
+of a @code{BEGIN} or @code{END} rule stops the execution of any further
+automatic rules for the current record, skips reading any remaining input
+records, and executes
+the @code{END} rule if there is one.
+
+If you do not want the @code{END} rule to do its job in this case, you
+can set a variable to non-zero before the @code{exit} statement, and check
+that variable in the @code{END} rule.
+@xref{Assert Function, ,Assertions},
+for an example that does this.
+
+@cindex dark corner
+If an argument is supplied to @code{exit}, its value is used as the exit
+status code for the @code{awk} process.  If no argument is supplied,
+@code{exit} returns status zero (success).  In the case where an argument
+is supplied to a first @code{exit} statement, and then @code{exit} is
+called a second time with no argument, the previously supplied exit value
+is used (d.c.).
+
+For example, let's say you've discovered an error condition you really
+don't know how to handle.  Conventionally, programs report this by
+exiting with a non-zero status.  Your @code{awk} program can do this
+using an @code{exit} statement with a non-zero argument.  Here is an
+example:
+
+@example
+@group
+BEGIN @{
+       if (("date" | getline date_now) < 0) @{
+         print "Can't get system date" > "/dev/stderr"
+         exit 1
+       @}
+       print "current date is", date_now
+       close("date")
+@}
+@end group
+@end example
+
+@node Built-in Variables, Arrays, Statements, Top
+@chapter Built-in Variables
+@cindex built-in variables
+
+Most @code{awk} variables are available for you to use for your own
+purposes; they never change except when your program assigns values to
+them, and never affect anything except when your program examines them.
+However, a few variables in @code{awk} have special built-in meanings.
+Some of them @code{awk} examines automatically, so that they enable you
+to tell @code{awk} how to do certain things.  Others are set
+automatically by @code{awk}, so that they carry information from the
+internal workings of @code{awk} to your program.
+
+This chapter documents all the built-in variables of @code{gawk}.  Most
+of them are also documented in the chapters describing their areas of
+activity.
+
+@menu
+* User-modified::               Built-in variables that you change to control
+                                @code{awk}.
+* Auto-set::                    Built-in variables where @code{awk} gives you
+                                information.
+* ARGC and ARGV::               Ways to use @code{ARGC} and @code{ARGV}.
+@end menu
+
+@node User-modified, Auto-set, Built-in Variables, Built-in Variables
+@section Built-in Variables that Control @code{awk}
+@cindex built-in variables, user modifiable
+
+This is an alphabetical list of the variables which you can change to
+control how @code{awk} does certain things. Those variables that are
+specific to @code{gawk} are marked with an asterisk, @samp{*}.
+
+@table @code
+@vindex CONVFMT
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item CONVFMT
+This string controls conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the POSIX standard.
+
+@vindex FIELDWIDTHS
+@item FIELDWIDTHS *
+This is a space separated list of columns that tells @code{gawk}
+how to split input with fixed, columnar boundaries.  It is an
+experimental feature.  Assigning to @code{FIELDWIDTHS}
+overrides the use of @code{FS} for field splitting.
+@xref{Constant Size, ,Reading Fixed-width Data}, for more information.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}), then @code{FIELDWIDTHS}
+has no special meaning, and field splitting operations are done based
+exclusively on the value of @code{FS}.
+
+@vindex FS
+@item FS
+@code{FS} is the input field separator
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+The value is a single-character string or a multi-character regular
+expression that matches the separations between fields in an input
+record.  If the value is the null string (@code{""}), then each
+character in the record becomes a separate field.
+
+The default value is @w{@code{" "}}, a string consisting of a single
+space.  As a special exception, this value means that any
+sequence of spaces, tabs, and/or newlines is a single separator.@footnote{In
+POSIX @code{awk}, newline does not count as whitespace.}  It also causes
+spaces, tabs, and newlines at the beginning and end of a record to be ignored.
+
+You can set the value of @code{FS} on the command line using the
+@samp{-F} option:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+If @code{gawk} is using @code{FIELDWIDTHS} for field-splitting,
+assigning a value to @code{FS} will cause @code{gawk} to return to
+the normal, @code{FS}-based, field splitting. An easy way to do this
+is to simply say @samp{FS = FS}, perhaps with an explanatory comment.
+
+@vindex IGNORECASE
+@item IGNORECASE *
+If @code{IGNORECASE} is non-zero or non-null, then all string comparisons,
+and all regular expression matching are case-independent.  Thus, regexp
+matching with @samp{~} and @samp{!~}, and the @code{gensub},
+@code{gsub}, @code{index}, @code{match}, @code{split} and @code{sub}
+functions, record termination with @code{RS}, and field splitting with
+@code{FS} all ignore case when doing their particular regexp operations.
+The value of @code{IGNORECASE} does @emph{not} affect array subscripting.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+then @code{IGNORECASE} has no special meaning, and string
+and regexp operations are always case-sensitive.
+
+@vindex OFMT
+@item OFMT
+This string controls conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
+printing with the @code{print} statement.  It works by being passed, in
+effect, as the first argument to the @code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Its default value is @code{"%.6g"}.  Earlier versions of @code{awk}
+also used @code{OFMT} to specify the format for converting numbers to
+strings in general expressions; this is now done by @code{CONVFMT}.
+
+@vindex OFS
+@item OFS
+This is the output field separator (@pxref{Output Separators}).  It is
+output between the fields output by a @code{print} statement.  Its
+default value is @w{@code{" "}}, a string consisting of a single space.
+
+@vindex ORS
+@item ORS
+This is the output record separator.  It is output at the end of every
+@code{print} statement.  Its default value is @code{"\n"}.
+(@xref{Output Separators}.)
+
+@vindex RS
+@item RS
+This is @code{awk}'s input record separator.  Its default value is a string
+containing a single newline character, which means that an input record
+consists of a single line of text.
+It can also be the null string, in which case records are separated by
+runs of blank lines, or a regexp, in which case records are separated by
+matches of the regexp in the input text.
+(@xref{Records, ,How Input is Split into Records}.)
+
+@vindex SUBSEP
+@item SUBSEP
+@code{SUBSEP} is the subscript separator.  It has the default value of
+@code{"\034"}, and is used to separate the parts of the indices of a
+multi-dimensional array.  Thus, the expression @code{@w{foo["A", "B"]}}
+really accesses @code{foo["A\034B"]}
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).
+@end table
+
+@node Auto-set, ARGC and ARGV, User-modified, Built-in Variables
+@section Built-in Variables that Convey Information
+@cindex built-in variables, convey information
+
+This is an alphabetical list of the variables that are set
+automatically by @code{awk} on certain occasions in order to provide
+information to your program.  Those variables that are specific to
+@code{gawk} are marked with an asterisk, @samp{*}.
+
+@table @code
+@vindex ARGC
+@vindex ARGV
+@item ARGC
+@itemx ARGV
+The command-line arguments available to @code{awk} programs are stored in
+an array called @code{ARGV}.  @code{ARGC} is the number of command-line
+arguments present.  @xref{Other Arguments, ,Other Command Line Arguments}.
+Unlike most @code{awk} arrays,
+@code{ARGV} is indexed from zero to @code{ARGC} @minus{} 1.  For example:
+
+@example
+@group
+$ awk 'BEGIN @{
+>        for (i = 0; i < ARGC; i++) 
+>            print ARGV[i] 
+>      @}' inventory-shipped BBS-list
+@print{} awk
+@print{} inventory-shipped
+@print{} BBS-list
+@end group
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}.  The value of @code{ARGC} is three, one more than the
+index of the last element in @code{ARGV}, since the elements are numbered
+from zero.
+
+The names @code{ARGC} and @code{ARGV}, as well as the convention of indexing
+the array from zero to @code{ARGC} @minus{} 1, are derived from the C language's
+method of accessing command line arguments.
+@xref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}, for information
+about how @code{awk} uses these variables.
+
+@vindex ARGIND
+@item ARGIND *
+The index in @code{ARGV} of the current file being processed.
+Every time @code{gawk} opens a new data file for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the file name.
+When @code{gawk} is processing the input files, it is always
+true that @samp{FILENAME == ARGV[ARGIND]}.
+
+This variable is useful in file processing; it allows you to tell how far
+along you are in the list of data files, and to distinguish between
+successive instances of the same filename on the command line.
+
+While you can change the value of @code{ARGIND} within your @code{awk}
+program, @code{gawk} will automatically set it to a new value when the
+next file is opened.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+
+@vindex ENVIRON
+@item ENVIRON
+An associative array that contains the values of the environment.  The array
+indices are the environment variable names; the values are the values of
+the particular environment variables.  For example,
+@code{ENVIRON["HOME"]} might be @file{/home/arnold}.  Changing this array
+does not affect the environment passed on to any programs that
+@code{awk} may spawn via redirection or the @code{system} function.
+(In a future version of @code{gawk}, it may do so.)
+
+Some operating systems may not have environment variables.
+On such systems, the @code{ENVIRON} array is empty (except for
+@w{@code{ENVIRON["AWKPATH"]}}).
+
+@vindex ERRNO
+@item ERRNO *
+If a system error occurs either doing a redirection for @code{getline},
+during a read for @code{getline}, or during a @code{close} operation,
+then @code{ERRNO} will contain a string describing the error.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+
+@cindex dark corner
+@vindex FILENAME
+@item FILENAME
+This is the name of the file that @code{awk} is currently reading.
+When no data files are listed on the command line, @code{awk} reads
+from the standard input, and @code{FILENAME} is set to @code{"-"}.
+@code{FILENAME} is changed each time a new file is read
+(@pxref{Reading Files, ,Reading Input Files}).
+Inside a @code{BEGIN} rule, the value of @code{FILENAME} is
+@code{""}, since there are no input files being processed
+yet.@footnote{Some early implementations of Unix @code{awk} initialized
+@code{FILENAME} to @code{"-"}, even if there were data files to be
+processed. This behavior was incorrect, and should not be relied
+upon in your programs.} (d.c.)
+
+@vindex FNR
+@item FNR
+@code{FNR} is the current record number in the current file.  @code{FNR} is
+incremented each time a new record is read
+(@pxref{Getline, ,Explicit Input with @code{getline}}).  It is reinitialized
+to zero each time a new input file is started.
+
+@vindex NF
+@item NF
+@code{NF} is the number of fields in the current input record.
+@code{NF} is set each time a new record is read, when a new field is
+created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).
+
+@vindex NR
+@item NR
+This is the number of input records @code{awk} has processed since
+the beginning of the program's execution
+(@pxref{Records, ,How Input is Split into Records}).
+@code{NR} is set each time a new record is read.
+
+@vindex RLENGTH
+@item RLENGTH
+@code{RLENGTH} is the length of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RLENGTH} is set by invoking the @code{match} function.  Its value
+is the length of the matched string, or @minus{}1 if no match was found.
+
+@vindex RSTART
+@item RSTART
+@code{RSTART} is the start-index in characters of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RSTART} is set by invoking the @code{match} function.  Its value
+is the position of the string where the matched substring starts, or zero
+if no match was found.
+
+@vindex RT
+@item RT *
+@code{RT} is set each time a record is read. It contains the input text
+that matched the text denoted by @code{RS}, the record separator.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+@end table
+
+@cindex dark corner
+A side note about @code{NR} and @code{FNR}.
+@code{awk} simply increments both of these variables
+each time it reads a record, instead of setting them to the absolute
+value of the number of records read.  This means that your program can
+change these variables, and their new values will be incremented for
+each record (d.c.).  For example:
+
+@example
+@group
+$ echo '1
+> 2
+> 3
+> 4' | awk 'NR == 2 @{ NR = 17 @}
+> @{ print NR @}'
+@print{} 1
+@print{} 17
+@print{} 18
+@print{} 19
+@end group
+@end example
+
+@noindent
+Before @code{FNR} was added to the @code{awk} language
+(@pxref{V7/SVR3.1, ,Major Changes between V7 and SVR3.1}),
+many @code{awk} programs used this feature to track the number of
+records in a file by resetting @code{NR} to zero when @code{FILENAME}
+changed.
+
+@node ARGC and ARGV, , Auto-set, Built-in Variables
+@section Using @code{ARGC} and @code{ARGV}
+
+In @ref{Auto-set,  ,  Built-in Variables that Convey Information},
+you saw this program describing the information contained in @code{ARGC}
+and @code{ARGV}:
+
+@example
+@group
+$ awk 'BEGIN @{
+>        for (i = 0; i < ARGC; i++) 
+>            print ARGV[i] 
+>      @}' inventory-shipped BBS-list
+@print{} awk
+@print{} inventory-shipped
+@print{} BBS-list
+@end group
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}.
+
+Notice that the @code{awk} program is not entered in @code{ARGV}.  The
+other special command line options, with their arguments, are also not
+entered.  But variable assignments on the command line @emph{are}
+treated as arguments, and do show up in the @code{ARGV} array.
+
+Your program can alter @code{ARGC} and the elements of @code{ARGV}.
+Each time @code{awk} reaches the end of an input file, it uses the next
+element of @code{ARGV} as the name of the next input file.  By storing a
+different string there, your program can change which files are read.
+You can use @code{"-"} to represent the standard input.  By storing
+additional elements and incrementing @code{ARGC} you can cause
+additional files to be read.
+
+If you decrease the value of @code{ARGC}, that eliminates input files
+from the end of the list.  By recording the old value of @code{ARGC}
+elsewhere, your program can treat the eliminated arguments as
+something other than file names.
+
+To eliminate a file from the middle of the list, store the null string
+(@code{""}) into @code{ARGV} in place of the file's name.  As a
+special feature, @code{awk} ignores file names that have been
+replaced with the null string.
+You may also use the @code{delete} statement to remove elements from
+@code{ARGV} (@pxref{Delete, ,The @code{delete} Statement}).
+
+All of these actions are typically done from the @code{BEGIN} rule,
+before actual processing of the input begins.
+@xref{Split Program, ,Splitting a Large File Into Pieces}, and see
+@ref{Tee Program, ,Duplicating Output Into Multiple Files}, for an example
+of each way of removing elements from @code{ARGV}.
+
+The following fragment processes @code{ARGV} in order to examine, and
+then remove, command line options.
+
+@example
+@group
+BEGIN @{
+    for (i = 1; i < ARGC; i++) @{
+        if (ARGV[i] == "-v")
+            verbose = 1
+        else if (ARGV[i] == "-d")
+            debug = 1
+@end group
+@group
+        else if (ARGV[i] ~ /^-?/) @{
+            e = sprintf("%s: unrecognized option -- %c",
+                    ARGV[0], substr(ARGV[i], 1, ,1))
+            print e > "/dev/stderr"
+        @} else
+            break
+        delete ARGV[i]
+    @}
+@}
+@end group
+@end example
+
+To actually get the options into the @code{awk} program, you have to
+end the @code{awk} options with @samp{--}, and then supply your options,
+like so:
+
+@example
+awk -f myprog -- -v -d file1 file2 @dots{}
+@end example
+
+@cindex differences between @code{gawk} and @code{awk}
+This is not necessary in @code{gawk}: Unless @samp{--posix} has been
+specified, @code{gawk} silently puts any unrecognized options into
+@code{ARGV} for the @code{awk} program to deal with.
+
+As soon as it
+sees an unknown option, @code{gawk} stops looking for other options it might
+otherwise recognize.  The above example with @code{gawk} would be:
+
+@example
+gawk -f myprog -d -v file1 file2 @dots{}
+@end example
+
+@noindent
+Since @samp{-d} is not a valid @code{gawk} option, the following @samp{-v}
+is passed on to the @code{awk} program.
+
+@node Arrays, Built-in, Built-in Variables, Top
+@chapter Arrays in @code{awk}
+
+An @dfn{array} is a table of values, called @dfn{elements}.  The
+elements of an array are distinguished by their indices.  @dfn{Indices}
+may be either numbers or strings.  @code{awk} maintains a single set
+of names that may be used for naming variables, arrays and functions
+(@pxref{User-defined, ,User-defined Functions}).
+Thus, you cannot have a variable and an array with the same name in the
+same @code{awk} program.
+
+@menu
+* Array Intro::                 Introduction to Arrays
+* Reference to Elements::       How to examine one element of an array.
+* Assigning Elements::          How to change an element of an array.
+* Array Example::               Basic Example of an Array
+* Scanning an Array::           A variation of the @code{for} statement. It
+                                loops through the indices of an array's
+                                existing elements.
+* Delete::                      The @code{delete} statement removes an element
+                                from an array.
+* Numeric Array Subscripts::    How to use numbers as subscripts in
+                                @code{awk}.
+* Uninitialized Subscripts::    Using Uninitialized variables as subscripts.
+* Multi-dimensional::           Emulating multi-dimensional arrays in
+                                @code{awk}.
+* Multi-scanning::              Scanning multi-dimensional arrays.
+@end menu
+
+@node Array Intro, Reference to Elements, Arrays, Arrays
+@section Introduction to Arrays
+
+@cindex arrays
+The @code{awk} language provides one-dimensional @dfn{arrays} for storing groups
+of related strings or numbers.
+
+Every @code{awk} array must have a name.  Array names have the same
+syntax as variable names; any valid variable name would also be a valid
+array name.  But you cannot use one name in both ways (as an array and
+as a variable) in one @code{awk} program.
+
+Arrays in @code{awk} superficially resemble arrays in other programming
+languages; but there are fundamental differences.  In @code{awk}, you
+don't need to specify the size of an array before you start to use it.
+Additionally, any number or string in @code{awk} may be used as an
+array index, not just consecutive integers.
+
+In most other languages, you have to @dfn{declare} an array and specify
+how many elements or components it contains.  In such languages, the
+declaration causes a contiguous block of memory to be allocated for that
+many elements.  An index in the array usually must be a positive integer; for
+example, the index zero specifies the first element in the array, which is
+actually stored at the beginning of the block of memory.  Index one
+specifies the second element, which is stored in memory right after the
+first element, and so on.  It is impossible to add more elements to the
+array, because it has room for only as many elements as you declared.
+(Some languages allow arbitrary starting and ending indices,
+e.g., @samp{15 .. 27}, but the size of the array is still fixed when
+the array is declared.)
+
+A contiguous array of four elements might look like this,
+conceptually, if the element values are eight, @code{"foo"},
+@code{""} and 30:
+
+@iftex
+@c from Karl Berry, much thanks for the help.
+@tex
+\bigskip % space above the table (about 1 linespace)
+\offinterlineskip
+\newdimen\width \width = 1.5cm
+\newdimen\hwidth \hwidth = 4\width \advance\hwidth by 2pt % 5 * 0.4pt
+\centerline{\vbox{
+\halign{\strut\hfil\ignorespaces#&&\vrule#&\hbox to\width{\hfil#\unskip\hfil}\cr
+\noalign{\hrule width\hwidth}
+	&&{\tt 8} &&{\tt "foo"} &&{\tt ""} &&{\tt 30} &&\quad value\cr
+\noalign{\hrule width\hwidth}
+\noalign{\smallskip}
+	&\omit&0&\omit &1   &\omit&2 &\omit&3 &\omit&\quad index\cr
+}
+}}
+@end tex
+@end iftex
+@ifinfo
+@example
++---------+---------+--------+---------+
+|    8    |  "foo"  |   ""   |    30   |    @r{value}
++---------+---------+--------+---------+
+     0         1         2         3        @r{index}
+@end example
+@end ifinfo
+
+@noindent
+Only the values are stored; the indices are implicit from the order of
+the values.  Eight is the value at index zero, because eight appears in the
+position with zero elements before it.
+
+@cindex arrays, definition of
+@cindex associative arrays
+@cindex arrays, associative
+Arrays in @code{awk} are different: they are @dfn{associative}.  This means
+that each array is a collection of pairs: an index, and its corresponding
+array element value:
+
+@example
+@r{Element} 4     @r{Value} 30
+@r{Element} 2     @r{Value} "foo"
+@r{Element} 1     @r{Value} 8
+@r{Element} 3     @r{Value} ""
+@end example
+
+@noindent
+We have shown the pairs in jumbled order because their order is irrelevant.
+
+One advantage of associative arrays is that new pairs can be added
+at any time.  For example, suppose we add to the above array a tenth element
+whose value is @w{@code{"number ten"}}.  The result is this:
+
+@example
+@r{Element} 10    @r{Value} "number ten"
+@r{Element} 4     @r{Value} 30
+@r{Element} 2     @r{Value} "foo"
+@r{Element} 1     @r{Value} 8
+@r{Element} 3     @r{Value} ""
+@end example
+
+@noindent
+@cindex sparse arrays
+@cindex arrays, sparse
+Now the array is @dfn{sparse}, which just means some indices are missing:
+it has elements 1--4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.
+@c ok, I should spell out the above, but ...
+
+Another consequence of associative arrays is that the indices don't
+have to be positive integers.  Any number, or even a string, can be
+an index.  For example, here is an array which translates words from
+English into French:
+
+@example
+@r{Element} "dog" @r{Value} "chien"
+@r{Element} "cat" @r{Value} "chat"
+@r{Element} "one" @r{Value} "un"
+@r{Element} 1     @r{Value} "un"
+@end example
+
+@noindent
+Here we decided to translate the number one in both spelled-out and
+numeric form---thus illustrating that a single array can have both
+numbers and strings as indices.
+(In fact, array subscripts are always strings; this is discussed
+in more detail in
+@ref{Numeric Array Subscripts, ,Using Numbers to Subscript Arrays}.)
+
+@cindex Array subscripts and @code{IGNORECASE}
+@cindex @code{IGNORECASE} and array subscripts
+@vindex IGNORECASE
+The value of @code{IGNORECASE} has no effect upon array subscripting.
+You must use the exact same string value to retrieve an array element
+as you used to store it.
+
+When @code{awk} creates an array for you, e.g., with the @code{split}
+built-in function,
+that array's indices are consecutive integers starting at one.
+(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
+
+@node Reference to Elements, Assigning Elements, Array Intro, Arrays
+@section Referring to an Array Element
+@cindex array reference
+@cindex element of array
+@cindex reference to array
+
+The principal way of using an array is to refer to one of its elements.
+An array reference is an expression which looks like this:
+
+@example
+@var{array}[@var{index}]
+@end example
+
+@noindent
+Here, @var{array} is the name of an array.  The expression @var{index} is
+the index of the element of the array that you want.
+
+The value of the array reference is the current value of that array
+element.  For example, @code{foo[4.3]} is an expression for the element
+of array @code{foo} at index @samp{4.3}.
+
+If you refer to an array element that has no recorded value, the value
+of the reference is @code{""}, the null string.  This includes elements
+to which you have not assigned any value, and elements that have been
+deleted (@pxref{Delete, ,The @code{delete} Statement}).  Such a reference
+automatically creates that array element, with the null string as its value.
+(In some cases, this is unfortunate, because it might waste memory inside
+@code{awk}.)
+
+@cindex arrays, presence of elements
+@cindex arrays, the @code{in} operator
+You can find out if an element exists in an array at a certain index with
+the expression:
+
+@example
+@var{index} in @var{array}
+@end example
+
+@noindent
+This expression tests whether or not the particular index exists,
+without the side effect of creating that element if it is not present.
+The expression has the value one (true) if @code{@var{array}[@var{index}]}
+exists, and zero (false) if it does not exist.
+
+For example, to test whether the array @code{frequencies} contains the
+index @samp{2}, you could write this statement:
+
+@example
+if (2 in frequencies)
+    print "Subscript 2 is present."
+@end example
+
+Note that this is @emph{not} a test of whether or not the array
+@code{frequencies} contains an element whose @emph{value} is two.
+(There is no way to do that except to scan all the elements.)  Also, this
+@emph{does not} create @code{frequencies[2]}, while the following
+(incorrect) alternative would do so:
+
+@example
+if (frequencies[2] != "")
+    print "Subscript 2 is present."
+@end example
+
+@node Assigning Elements, Array Example, Reference to Elements, Arrays
+@section Assigning Array Elements
+@cindex array assignment
+@cindex element assignment
+
+Array elements are lvalues: they can be assigned values just like
+@code{awk} variables:
+
+@example
+@var{array}[@var{subscript}] = @var{value}
+@end example
+
+@noindent
+Here @var{array} is the name of your array.  The expression
+@var{subscript} is the index of the element of the array that you want
+to assign a value.  The expression @var{value} is the value you are
+assigning to that element of the array.
+
+@node Array Example, Scanning an Array, Assigning Elements, Arrays
+@section Basic Array Example
+
+The following program takes a list of lines, each beginning with a line
+number, and prints them out in order of line number.  The line numbers are
+not in order, however, when they are first read:  they are scrambled.  This
+program sorts the lines by making an array using the line numbers as
+subscripts.  It then prints out the lines in sorted order of their numbers.
+It is a very simple program, and gets confused if it encounters repeated
+numbers, gaps, or lines that don't begin with a number.
+
+@example
+@c file eg/misc/arraymax.awk
+@{
+  if ($1 > max)
+    max = $1
+  arr[$1] = $0
+@}
+
+END @{
+  for (x = 1; x <= max; x++)
+    print arr[x]
+@}
+@c endfile
+@end example
+
+The first rule keeps track of the largest line number seen so far;
+it also stores each line into the array @code{arr}, at an index that
+is the line's number.
+
+The second rule runs after all the input has been read, to print out
+all the lines.
+
+When this program is run with the following input:
+
+@example
+@group
+@c file eg/misc/arraymax.data
+5  I am the Five man
+2  Who are you?  The new number two!
+4  . . . And four on the floor
+1  Who is number one?
+3  I three you.
+@c endfile
+@end group
+@end example
+
+@noindent
+its output is this:
+
+@example
+1  Who is number one?
+2  Who are you?  The new number two!
+3  I three you.
+4  . . . And four on the floor
+5  I am the Five man
+@end example
+
+If a line number is repeated, the last line with a given number overrides
+the others.
+
+Gaps in the line numbers can be handled with an easy improvement to the
+program's @code{END} rule:
+
+@example
+END @{
+  for (x = 1; x <= max; x++)
+    if (x in arr)
+      print arr[x]
+@}
+@end example
+
+@node Scanning an Array, Delete, Array Example, Arrays
+@section Scanning All Elements of an Array
+@cindex @code{for (x in @dots{})}
+@cindex arrays, special @code{for} statement
+@cindex scanning an array
+
+In programs that use arrays, you often need a loop that executes
+once for each element of an array.  In other languages, where arrays are
+contiguous and indices are limited to positive integers, this is
+easy: you can
+find all the valid indices by counting from the lowest index
+up to the highest.  This
+technique won't do the job in @code{awk}, since any number or string
+can be an array index.  So @code{awk} has a special kind of @code{for}
+statement for scanning an array:
+
+@example
+for (@var{var} in @var{array})
+  @var{body}
+@end example
+
+@noindent
+This loop executes @var{body} once for each index in @var{array} that your
+program has previously used, with the
+variable @var{var} set to that index.
+
+Here is a program that uses this form of the @code{for} statement.  The
+first rule scans the input records and notes which words appear (at
+least once) in the input, by storing a one into the array @code{used} with
+the word as index.  The second rule scans the elements of @code{used} to
+find all the distinct words that appear in the input.  It prints each
+word that is more than 10 characters long, and also prints the number of
+such words.  @xref{String Functions, ,Built-in Functions for String Manipulation}, for more information
+on the built-in function @code{length}.
+
+@example
+# Record a 1 for each word that is used at least once.
+@{
+    for (i = 1; i <= NF; i++)
+        used[$i] = 1
+@}
+
+# Find number of distinct words more than 10 characters long.
+END @{
+    for (x in used)
+        if (length(x) > 10) @{
+            ++num_long_words
+            print x
+        @}
+    print num_long_words, "words longer than 10 characters"
+@}
+@end example
+
+@noindent
+@xref{Word Sorting, ,Generating Word Usage Counts},
+for a more detailed example of this type.
+
+The order in which elements of the array are accessed by this statement
+is determined by the internal arrangement of the array elements within
+@code{awk} and cannot be controlled or changed.  This can lead to
+problems if new elements are added to @var{array} by statements in
+the loop body; you cannot predict whether or not the @code{for} loop will
+reach them.  Similarly, changing @var{var} inside the loop may produce
+strange results.  It is best to avoid such things.
+
+@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements of arrays
+@cindex removing elements of arrays
+@cindex arrays, deleting an element
+
+You can remove an individual element of an array using the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index}]
+@end example
+
+Once you have deleted an array element, you can no longer obtain any
+value the element once had.  It is as if you had never referred
+to it and had never given it any value.
+
+Here is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+  delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+
+If you delete an element, a subsequent @code{for} statement to scan the array
+will not report that element, and the @code{in} operator to check for
+the presence of that element will return zero (i.e.@: false):
+
+@example
+delete foo[4]
+if (4 in foo)
+    print "This will never be printed"
+@end example
+
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+
+@example
+foo[4] = ""
+if (4 in foo)
+  print "This is printed, even though foo[4] is empty"
+@end example
+
+It is not an error to delete an element that does not exist.
+
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex differences between @code{gawk} and @code{awk}
+You can delete all the elements of an array with a single statement,
+by leaving off the subscript in the @code{delete} statement.
+
+@example
+delete @var{array}
+@end example
+
+This ability is a @code{gawk} extension; it is not available in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+@cindex portability issues
+The following statement provides a portable, but non-obvious way to clear
+out an array.
+
+@cindex Brennan, Michael
+@example
+@group
+# thanks to Michael Brennan for pointing this out
+split("", array)
+@end group
+@end example
+
+The @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation})
+clears out the target array first. This call asks it to split
+apart the null string. Since there is no data to split out, the
+function simply clears the array and then returns.
+
+@node Numeric Array Subscripts, Uninitialized Subscripts, Delete, Arrays
+@section Using Numbers to Subscript Arrays
+
+An important aspect of arrays to remember is that @emph{array subscripts
+are always strings}.  If you use a numeric value as a subscript,
+it will be converted to a string value before it is used for subscripting
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@cindex conversions, during subscripting
+@cindex numbers, used as subscripts
+@vindex CONVFMT
+This means that the value of the built-in variable @code{CONVFMT} can potentially
+affect how your program accesses elements of an array.  For example:
+
+@example
+xyz = 12.153
+data[xyz] = 1
+CONVFMT = "%2.2f"
+@group
+if (xyz in data)
+    printf "%s is in data\n", xyz
+else
+    printf "%s is not in data\n", xyz
+@end group
+@end example
+
+@noindent
+This prints @samp{12.15 is not in data}.  The first statement gives
+@code{xyz} a numeric value.  Assigning to
+@code{data[xyz]} subscripts @code{data} with the string value @code{"12.153"}
+(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}),
+and assigns one to @code{data["12.153"]}.  The program then changes
+the value of @code{CONVFMT}.  The test @samp{(xyz in data)} generates a new
+string value from @code{xyz}, this time @code{"12.15"}, since the value of
+@code{CONVFMT} only allows two significant digits.  This test fails,
+since @code{"12.15"} is a different string from @code{"12.153"}.
+
+According to the rules for conversions
+(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
+values are always converted to strings as integers, no matter what the
+value of @code{CONVFMT} may happen to be.  So the usual case of:
+
+@example
+for (i = 1; i <= maxsub; i++)
+    @i{do something with} array[i]
+@end example
+
+@noindent
+will work, no matter what the value of @code{CONVFMT}.
+
+Like many things in @code{awk}, the majority of the time things work
+as you would expect them to work.  But it is useful to have a precise
+knowledge of the actual rules, since sometimes they can have a subtle
+effect on your programs.
+
+@node Uninitialized Subscripts, Multi-dimensional, Numeric Array Subscripts, Arrays
+@section Using Uninitialized Variables as Subscripts
+
+@cindex uninitialized variables, as array subscripts
+@cindex array subscripts, uninitialized variables
+Suppose you want to print your input data in reverse order.
+A reasonable attempt at a program to do so (with some test
+data) might look like this:
+
+@example
+@group
+$ echo 'line 1
+> line 2
+> line 3' | awk '@{ l[lines] = $0; ++lines @}
+> END @{
+>     for (i = lines-1; i >= 0; --i)
+>        print l[i]
+> @}'
+@print{} line 3
+@print{} line 2
+@end group
+@end example
+
+Unfortunately, the very first line of input data did not come out in the
+output!
+
+At first glance, this program should have worked.  The variable @code{lines}
+is uninitialized, and uninitialized variables have the numeric value zero.
+So, the value of @code{l[0]} should have been printed.
+
+The issue here is that subscripts for @code{awk} arrays are @strong{always}
+strings. And uninitialized variables, when used as strings, have the
+value @code{""}, not zero.  Thus, @samp{line 1} ended up stored in
+@code{l[""]}.
+
+The following version of the program works correctly:
+
+@example
+@{ l[lines++] = $0 @}
+END @{
+    for (i = lines - 1; i >= 0; --i)
+       print l[i]
+@}
+@end example
+
+Here, the @samp{++} forces @code{lines} to be numeric, thus making
+the ``old value'' numeric zero, which is then converted to @code{"0"}
+as the array subscript.
+
+@cindex null string, as array subscript
+@cindex dark corner
+As we have just seen, even though it is somewhat unusual, the null string
+(@code{""}) is a valid array subscript (d.c.). If @samp{--lint} is provided
+on the command line (@pxref{Options, ,Command Line Options}),
+@code{gawk} will warn about the use of the null string as a subscript.
+
+@node Multi-dimensional, Multi-scanning, Uninitialized Subscripts, Arrays
+@section Multi-dimensional Arrays
+
+@cindex subscripts in arrays
+@cindex arrays, multi-dimensional subscripts
+@cindex multi-dimensional subscripts
+A multi-dimensional array is an array in which an element is identified
+by a sequence of indices, instead of a single index.  For example, a
+two-dimensional array requires two indices.  The usual way (in most
+languages, including @code{awk}) to refer to an element of a
+two-dimensional array named @code{grid} is with
+@code{grid[@var{x},@var{y}]}.
+
+@vindex SUBSEP
+Multi-dimensional arrays are supported in @code{awk} through
+concatenation of indices into one string.  What happens is that
+@code{awk} converts the indices into strings
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
+concatenates them together, with a separator between them.  This creates
+a single string that describes the values of the separate indices.  The
+combined string is used as a single index into an ordinary,
+one-dimensional array.  The separator used is the value of the built-in
+variable @code{SUBSEP}.
+
+For example, suppose we evaluate the expression @samp{foo[5,12] = "value"}
+when the value of @code{SUBSEP} is @code{"@@"}.  The numbers five and 12 are
+converted to strings and
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
+the array element @code{foo["5@@12"]} is set to @code{"value"}.
+
+Once the element's value is stored, @code{awk} has no record of whether
+it was stored with a single index or a sequence of indices.  The two
+expressions @samp{foo[5,12]} and @w{@samp{foo[5 SUBSEP 12]}} are always
+equivalent.
+
+The default value of @code{SUBSEP} is the string @code{"\034"},
+which contains a non-printing character that is unlikely to appear in an
+@code{awk} program or in most input data.
+
+The usefulness of choosing an unlikely character comes from the fact
+that index values that contain a string matching @code{SUBSEP} lead to
+combined strings that are ambiguous.  Suppose that @code{SUBSEP} were
+@code{"@@"}; then @w{@samp{foo["a@@b", "c"]}} and @w{@samp{foo["a",
+"b@@c"]}} would be indistinguishable because both would actually be
+stored as @samp{foo["a@@b@@c"]}.
+
+You can test whether a particular index-sequence exists in a
+``multi-dimensional'' array with the same operator @samp{in} used for single
+dimensional arrays.  Instead of a single index as the left-hand operand,
+write the whole sequence of indices, separated by commas, in
+parentheses:
+
+@example
+(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+@end example
+
+The following example treats its input as a two-dimensional array of
+fields; it rotates this array 90 degrees clockwise and prints the
+result.  It assumes that all lines have the same number of
+elements.
+
+@example
+@group
+awk '@{
+     if (max_nf < NF)
+          max_nf = NF
+     max_nr = NR
+     for (x = 1; x <= NF; x++)
+          vector[x, NR] = $x
+@}
+@end group
+
+@group
+END @{
+     for (x = 1; x <= max_nf; x++) @{
+          for (y = max_nr; y >= 1; --y)
+               printf("%s ", vector[x, y])
+          printf("\n")
+     @}
+@}'
+@end group
+@end example
+
+@noindent
+When given the input:
+
+@example
+@group
+1 2 3 4 5 6
+2 3 4 5 6 1
+3 4 5 6 1 2
+4 5 6 1 2 3
+@end group
+@end example
+
+@noindent
+it produces:
+
+@example
+@group
+4 3 2 1
+5 4 3 2
+6 5 4 3
+1 6 5 4
+2 1 6 5
+3 2 1 6
+@end group
+@end example
+
+@node Multi-scanning,  , Multi-dimensional, Arrays
+@section Scanning Multi-dimensional Arrays
+
+There is no special @code{for} statement for scanning a
+``multi-dimensional'' array; there cannot be one, because in truth there
+are no multi-dimensional arrays or elements; there is only a
+multi-dimensional @emph{way of accessing} an array.
+
+However, if your program has an array that is always accessed as
+multi-dimensional, you can get the effect of scanning it by combining
+the scanning @code{for} statement
+(@pxref{Scanning an Array, ,Scanning All Elements of an Array}) with the
+@code{split} built-in function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+It works like this:
+
+@example
+for (combined in array) @{
+  split(combined, separate, SUBSEP)
+  @dots{}
+@}
+@end example
+
+@noindent
+This sets @code{combined} to
+each concatenated, combined index in the array, and splits it
+into the individual indices by breaking it apart where the value of
+@code{SUBSEP} appears.  The split-out indices become the elements of
+the array @code{separate}.
+
+Thus, suppose you have previously stored a value in @code{array[1, "foo"]};
+then an element with index @code{"1\034foo"} exists in
+@code{array}.  (Recall that the default value of @code{SUBSEP} is
+the character with code 034.)  Sooner or later the @code{for} statement
+will find that index and do an iteration with @code{combined} set to
+@code{"1\034foo"}.  Then the @code{split} function is called as
+follows:
+
+@example
+split("1\034foo", separate, "\034")
+@end example
+
+@noindent
+The result of this is to set @code{separate[1]} to @code{"1"} and
+@code{separate[2]} to @code{"foo"}.  Presto, the original sequence of
+separate indices has been recovered.
+
+@node Built-in, User-defined, Arrays, Top
+@chapter Built-in Functions
+
+@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
+@cindex built-in functions
+@dfn{Built-in} functions are functions that are always available for
+your @code{awk} program to call.  This chapter defines all the built-in
+functions in @code{awk}; some of them are mentioned in other sections,
+but they are summarized here for your convenience.  (You can also define
+new functions yourself.  @xref{User-defined, ,User-defined Functions}.)
+
+@menu
+* Calling Built-in::            How to call built-in functions.
+* Numeric Functions::           Functions that work with numbers, including
+                                @code{int}, @code{sin} and @code{rand}.
+* String Functions::            Functions for string manipulation, such as
+                                @code{split}, @code{match}, and
+                                @code{sprintf}.
+* I/O Functions::               Functions for files and shell commands.
+* Time Functions::              Functions for dealing with time stamps.
+@end menu
+
+@node Calling Built-in, Numeric Functions, Built-in, Built-in
+@section Calling Built-in Functions
+
+To call a built-in function, write the name of the function followed
+by arguments in parentheses.  For example, @samp{atan2(y + z, 1)}
+is a call to the function @code{atan2}, with two arguments.
+
+Whitespace is ignored between the built-in function name and the
+open-parenthesis, but we recommend that you avoid using whitespace
+there.  User-defined functions do not permit whitespace in this way, and
+you will find it easier to avoid mistakes by following a simple
+convention which always works: no whitespace after a function name.
+
+@cindex differences between @code{gawk} and @code{awk}
+Each built-in function accepts a certain number of arguments.
+In some cases, arguments can be omitted. The defaults for omitted
+arguments vary from function to function and are described under the
+individual functions.  In some @code{awk} implementations, extra
+arguments given to built-in functions are ignored.  However, in @code{gawk},
+it is a fatal error to give extra arguments to a built-in function.
+
+When a function is called, expressions that create the function's actual
+parameters are evaluated completely before the function call is performed.
+For example, in the code fragment:
+
+@example
+i = 4
+j = sqrt(i++)
+@end example
+
+@noindent
+the variable @code{i} is set to five before @code{sqrt} is called
+with a value of four for its actual parameter.
+
+@cindex evaluation, order of
+@cindex order of evaluation
+The order of evaluation of the expressions used for the function's
+parameters is undefined.  Thus, you should not write programs that
+assume that parameters are evaluated from left to right or from
+right to left.  For example,
+
+@example
+i = 5
+j = atan2(i++, i *= 2)
+@end example
+
+If the order of evaluation is left to right, then @code{i} first becomes
+six, and then 12, and @code{atan2} is called with the two arguments six
+and 12.  But if the order of evaluation is right to left, @code{i}
+first becomes 10, and then 11, and @code{atan2} is called with the
+two arguments 11 and 10.
+
+@node Numeric Functions, String Functions, Calling Built-in, Built-in
+@section Numeric Built-in Functions
+
+Here is a full list of built-in functions that work with numbers.
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item int(@var{x})
+@findex int
+This produces the nearest integer to @var{x}, located between @var{x} and zero,
+truncated toward zero.
+
+For example, @code{int(3)} is three, @code{int(3.9)} is three, @code{int(-3.9)}
+is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
+
+@item sqrt(@var{x})
+@findex sqrt
+This gives you the positive square root of @var{x}.  It reports an error
+if @var{x} is negative.  Thus, @code{sqrt(4)} is two.
+
+@item exp(@var{x})
+@findex exp
+This gives you the exponential of @var{x} (@code{e ^ @var{x}}), or reports
+an error if @var{x} is out of range.  The range of values @var{x} can have
+depends on your machine's floating point representation.
+
+@item log(@var{x})
+@findex log
+This gives you the natural logarithm of @var{x}, if @var{x} is positive;
+otherwise, it reports an error.
+
+@item sin(@var{x})
+@findex sin
+This gives you the sine of @var{x}, with @var{x} in radians.
+
+@item cos(@var{x})
+@findex cos
+This gives you the cosine of @var{x}, with @var{x} in radians.
+
+@item atan2(@var{y}, @var{x})
+@findex atan2
+This gives you the arctangent of @code{@var{y} / @var{x}} in radians.
+
+@item rand()
+@findex rand
+This gives you a random number.  The values of @code{rand} are
+uniformly-distributed between zero and one.
+The value is never zero and never one.
+
+Often you want random integers instead.  Here is a user-defined function
+you can use to obtain a random non-negative integer less than @var{n}:
+
+@example
+function randint(n) @{
+     return int(n * rand())
+@}
+@end example
+
+@noindent
+The multiplication produces a random real number greater than zero and less
+than @code{n}.  We then make it an integer (using @code{int}) between zero
+and @code{n} @minus{} 1, inclusive.
+
+Here is an example where a similar function is used to produce
+random integers between one and @var{n}.  This program
+prints a new random number for each input record.
+
+@example
+@group
+awk '
+# Function to roll a simulated die.
+function roll(n) @{ return 1 + int(rand() * n) @}
+@end group
+
+@group
+# Roll 3 six-sided dice and
+# print total number of points.
+@{
+      printf("%d points\n",
+             roll(6)+roll(6)+roll(6))
+@}'
+@end group
+@end example
+
+@cindex seed for random numbers
+@cindex random numbers, seed of
+@comment MAWK uses a different seed each time.
+@strong{Caution:} In most @code{awk} implementations, including @code{gawk},
+@code{rand} starts generating numbers from the same
+starting number, or @dfn{seed}, each time you run @code{awk}.  Thus,
+a program will generate the same results each time you run it.
+The numbers are random within one @code{awk} run, but predictable
+from run to run.  This is convenient for debugging, but if you want
+a program to do different things each time it is used, you must change
+the seed to a value that will be different in each run.  To do this,
+use @code{srand}.
+
+@item srand(@r{[}@var{x}@r{]})
+@findex srand
+The function @code{srand} sets the starting point, or seed,
+for generating random numbers to the value @var{x}.
+
+Each seed value leads to a particular sequence of random
+numbers.@footnote{Computer generated random numbers really are not truly
+random.  They are technically known as ``pseudo-random.''  This means
+that while the numbers in a sequence appear to be random, you can in
+fact generate the same sequence of random numbers over and over again.}
+Thus, if you set the seed to the same value a second time, you will get
+the same sequence of random numbers again.
+
+If you omit the argument @var{x}, as in @code{srand()}, then the current
+date and time of day are used for a seed.  This is the way to get random
+numbers that are truly unpredictable.
+
+The return value of @code{srand} is the previous seed.  This makes it
+easy to keep track of the seeds for use in consistently reproducing
+sequences of random numbers.
+@end table
+
+@node String Functions, I/O Functions, Numeric Functions, Built-in
+@section Built-in Functions for String Manipulation
+
+The functions in this section look at or change the text of one or more
+strings.
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item index(@var{in}, @var{find})
+@findex index
+This searches the string @var{in} for the first occurrence of the string
+@var{find}, and returns the position in characters where that occurrence
+begins in the string @var{in}.  For example:
+
+@example
+$ awk 'BEGIN @{ print index("peanut", "an") @}'
+@print{} 3
+@end example
+
+@noindent
+If @var{find} is not found, @code{index} returns zero.
+(Remember that string indices in @code{awk} start at one.)
+
+@item length(@r{[}@var{string}@r{]})
+@findex length
+This gives you the number of characters in @var{string}.  If
+@var{string} is a number, the length of the digit string representing
+that number is returned.  For example, @code{length("abcde")} is five.  By
+contrast, @code{length(15 * 35)} works out to three.  How?  Well, 15 * 35 =
+525, and 525 is then converted to the string @code{"525"}, which has
+three characters.
+
+If no argument is supplied, @code{length} returns the length of @code{$0}.
+
+@cindex historical features
+@cindex portability issues
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In older versions of @code{awk}, you could call the @code{length} function
+without any parentheses.  Doing so is marked as ``deprecated'' in the
+POSIX standard.  This means that while you can do this in your
+programs, it is a feature that can eventually be removed from a future
+version of the standard.  Therefore, for maximal portability of your
+@code{awk} programs, you should always supply the parentheses.
+
+@item match(@var{string}, @var{regexp})
+@findex match
+The @code{match} function searches the string, @var{string}, for the
+longest, leftmost substring matched by the regular expression,
+@var{regexp}.  It returns the character position, or @dfn{index}, of
+where that substring begins (one, if it starts at the beginning of
+@var{string}).  If no match is found, it returns zero.
+
+@vindex RSTART
+@vindex RLENGTH
+The @code{match} function sets the built-in variable @code{RSTART} to
+the index.  It also sets the built-in variable @code{RLENGTH} to the
+length in characters of the matched substring.  If no match is found,
+@code{RSTART} is set to zero, and @code{RLENGTH} to @minus{}1.
+
+For example:
+
+@example
+@group
+@c file eg/misc/findpat.sh
+awk '@{
+       if ($1 == "FIND")
+         regex = $2
+       else @{
+         where = match($0, regex)
+         if (where != 0)
+           print "Match of", regex, "found at", \
+                     where, "in", $0
+       @}
+@}'
+@c endfile
+@end group
+@end example
+
+@noindent
+This program looks for lines that match the regular expression stored in
+the variable @code{regex}.  This regular expression can be changed.  If the
+first word on a line is @samp{FIND}, @code{regex} is changed to be the
+second word on that line.  Therefore, given:
+
+@example
+@c file eg/misc/findpat.data
+FIND ru+n
+My program runs
+but not very quickly
+FIND Melvin
+JF+KM
+This line is property of Reality Engineering Co.
+Melvin was here.
+@c endfile
+@end example
+
+@noindent
+@code{awk} prints:
+
+@example
+Match of ru+n found at 12 in My program runs
+Match of Melvin found at 1 in Melvin was here.
+@end example
+
+@item split(@var{string}, @var{array} @r{[}, @var{fieldsep}@r{]})
+@findex split
+This divides @var{string} into pieces separated by @var{fieldsep},
+and stores the pieces in @var{array}.  The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth.  The string value of the third argument, @var{fieldsep}, is
+a regexp describing where to split @var{string} (much as @code{FS} can
+be a regexp describing where to split input records).  If
+the @var{fieldsep} is omitted, the value of @code{FS} is used.
+@code{split} returns the number of elements created.
+
+The @code{split} function splits strings into pieces in a
+manner similar to the way input lines are split into fields.  For example:
+
+@example
+split("cul-de-sac", a, "-")
+@end example
+
+@noindent
+splits the string @samp{cul-de-sac} into three fields using @samp{-} as the
+separator.  It sets the contents of the array @code{a} as follows:
+
+@example
+a[1] = "cul"
+a[2] = "de"
+a[3] = "sac"
+@end example
+
+@noindent
+The value returned by this call to @code{split} is three.
+
+As with input field-splitting, when the value of @var{fieldsep} is
+@w{@code{" "}}, leading and trailing whitespace is ignored, and the elements
+are separated by runs of whitespace.
+
+@cindex differences between @code{gawk} and @code{awk}
+Also as with input field-splitting, if @var{fieldsep} is the null string, each
+individual character in the string is split into its own array element.
+(This is a @code{gawk}-specific extension.)
+
+@cindex dark corner
+Recent implementations of @code{awk}, including @code{gawk}, allow
+the third argument to be a regexp constant (@code{/abc/}), as well as a
+string (d.c.).  The POSIX standard allows this as well.
+
+Before splitting the string, @code{split} deletes any previously existing
+elements in the array @var{array} (d.c.).
+
+@item sprintf(@var{format}, @var{expression1},@dots{})
+@findex sprintf
+This returns (without printing) the string that @code{printf} would
+have printed out with the same arguments
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+For example:
+
+@example
+sprintf("pi = %.2f (approx.)", 22/7)
+@end example
+
+@noindent
+returns the string @w{@code{"pi = 3.14 (approx.)"}}.
+
+@ignore
+2e: For sub, gsub, and gensub, either here or in the "how much matches"
+    section, we need some explanation that it is possible to match the
+    null string when using closures like *.  E.g.,
+
+         $ echo abc | awk '{ gsub(/m*/, "X"); print }'
+         @print{} XaXbXcX
+
+    Although this makes a certain amount of sense, it can be very
+    suprising.
+@end ignore
+
+@item sub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@findex sub
+The @code{sub} function alters the value of @var{target}.
+It searches this value, which is treated as a string, for the
+leftmost longest substring matched by the regular expression, @var{regexp},
+extending this match as far as possible.  Then the entire string is
+changed by replacing the matched text with @var{replacement}.
+The modified string becomes the new value of @var{target}.
+
+This function is peculiar because @var{target} is not simply
+used to compute a value, and not just any expression will do: it
+must be a variable, field or array element, so that @code{sub} can
+store a modified value there.  If this argument is omitted, then the
+default is to use and alter @code{$0}.
+
+For example:
+
+@example
+str = "water, water, everywhere"
+sub(/at/, "ith", str)
+@end example
+
+@noindent
+sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
+leftmost, longest occurrence of @samp{at} with @samp{ith}.
+
+The @code{sub} function returns the number of substitutions made (either
+one or zero).
+
+If the special character @samp{&} appears in @var{replacement}, it
+stands for the precise substring that was matched by @var{regexp}.  (If
+the regexp can match more than one string, then this precise substring
+may vary.)  For example:
+
+@example
+awk '@{ sub(/candidate/, "& and his wife"); print @}'
+@end example
+
+@noindent
+changes the first occurrence of @samp{candidate} to @samp{candidate
+and his wife} on each input line.
+
+Here is another example:
+
+@example
+awk 'BEGIN @{
+        str = "daabaaa"
+        sub(/a*/, "c&c", str)
+        print str
+@}'
+@print{} dcaacbaaa
+@end example
+
+@noindent
+This shows how @samp{&} can represent a non-constant string, and also
+illustrates the ``leftmost, longest'' rule in regexp matching
+(@pxref{Leftmost Longest, ,How Much Text Matches?}).
+
+The effect of this special character (@samp{&}) can be turned off by putting a
+backslash before it in the string.  As usual, to insert one backslash in
+the string, you must write two backslashes.  Therefore, write @samp{\\&}
+in a string constant to include a literal @samp{&} in the replacement.
+For example, here is how to replace the first @samp{|} on each line with
+an @samp{&}:
+
+@example
+awk '@{ sub(/\|/, "\\&"); print @}'
+@end example
+
+@cindex @code{sub}, third argument of
+@cindex @code{gsub}, third argument of
+@strong{Note:} As mentioned above, the third argument to @code{sub} must
+be a variable, field or array reference.
+Some versions of @code{awk} allow the third argument to
+be an expression which is not an lvalue.  In such a case, @code{sub}
+would still search for the pattern and return zero or one, but the result of
+the substitution (if any) would be thrown away because there is no place
+to put it.  Such versions of @code{awk} accept expressions like
+this:
+
+@example
+sub(/USA/, "United States", "the USA and Canada")
+@end example
+
+@noindent
+For historical compatibility, @code{gawk} will accept erroneous code,
+such as in the above example. However, using any other non-changeable
+object as the third parameter will cause a fatal error, and your program
+will not run.
+
+@item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@findex gsub
+This is similar to the @code{sub} function, except @code{gsub} replaces
+@emph{all} of the longest, leftmost, @emph{non-overlapping} matching
+substrings it can find.  The @samp{g} in @code{gsub} stands for
+``global,'' which means replace everywhere.  For example:
+
+@example
+awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
+@end example
+
+@noindent
+replaces all occurrences of the string @samp{Britain} with @samp{United
+Kingdom} for all input records.
+
+The @code{gsub} function returns the number of substitutions made.  If
+the variable to be searched and altered, @var{target}, is
+omitted, then the entire input record, @code{$0}, is used.
+
+As in @code{sub}, the characters @samp{&} and @samp{\} are special,
+and the third argument must be an lvalue.
+@end table
+
+@table @code
+@item gensub(@var{regexp}, @var{replacement}, @var{how} @r{[}, @var{target}@r{]})
+@findex gensub
+@code{gensub} is a general substitution function.  Like @code{sub} and
+@code{gsub}, it searches the target string @var{target} for matches of
+the regular expression @var{regexp}.  Unlike @code{sub} and
+@code{gsub}, the modified string is returned as the result of the
+function, and the original target string is @emph{not} changed.  If
+@var{how} is a string beginning with @samp{g} or @samp{G}, then it
+replaces all matches of @var{regexp} with @var{replacement}.
+Otherwise, @var{how} is a number indicating which match of @var{regexp}
+to replace. If no @var{target} is supplied, @code{$0} is used instead.
+
+@code{gensub} provides an additional feature that is not available
+in @code{sub} or @code{gsub}: the ability to specify components of
+a regexp in the replacement text.  This is done by using parentheses
+in the regexp to mark the components, and then specifying @samp{\@var{n}}
+in the replacement text, where @var{n} is a digit from one to nine.
+For example:
+
+@example
+@group
+$ gawk '
+> BEGIN @{
+>      a = "abc def"
+>      b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)
+>      print b
+> @}'
+@print{} def abc
+@end group
+@end example
+
+@noindent
+As described above for @code{sub}, you must type two backslashes in order
+to get one into the string.
+
+In the replacement text, the sequence @samp{\0} represents the entire
+matched text, as does the character @samp{&}.
+
+This example shows how you can use the third argument to control
+which match of the regexp should be changed.
+
+@example
+$ echo a b c a b c |
+> gawk '@{ print gensub(/a/, "AA", 2) @}'
+@print{} a b c AA b c
+@end example
+
+In this case, @code{$0} is used as the default target string.
+@code{gensub} returns the new string as its result, which is
+passed directly to @code{print} for printing.
+
+If the @var{how} argument is a string that does not begin with @samp{g} or
+@samp{G}, or if it is a number that is less than zero, only one
+substitution is performed.
+
+@cindex differences between @code{gawk} and @code{awk}
+@code{gensub} is a @code{gawk} extension; it is not available
+in compatibility mode (@pxref{Options, ,Command Line Options}).
+
+@item substr(@var{string}, @var{start} @r{[}, @var{length}@r{]})
+@findex substr
+This returns a @var{length}-character-long substring of @var{string},
+starting at character number @var{start}.  The first character of a
+string is character number one.  For example,
+@code{substr("washington", 5, 3)} returns @code{"ing"}.
+
+If @var{length} is not present, this function returns the whole suffix of
+@var{string} that begins at character number @var{start}.  For example,
+@code{substr("washington", 5)} returns @code{"ington"}.  The whole
+suffix is also returned
+if @var{length} is greater than the number of characters remaining
+in the string, counting from character number @var{start}.
+
+@strong{Note:} The string returned by @code{substr} @emph{cannot} be
+assigned to.  Thus, it is a mistake to attempt to change a portion of
+a string, like this:
+
+@example
+string = "abcdef"
+# try to get "abCDEf", won't work
+substr(string, 3, 3) = "CDE"
+@end example
+
+@noindent
+or to use @code{substr} as the third agument of @code{sub} or @code{gsub}:
+
+@example
+gsub(/xyz/, "pdq", substr($0, 5, 20))  # WRONG
+@end example
+
+@cindex case conversion
+@cindex conversion of case
+@item tolower(@var{string})
+@findex tolower
+This returns a copy of @var{string}, with each upper-case character
+in the string replaced with its corresponding lower-case character.
+Non-alphabetic characters are left unchanged.  For example,
+@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
+
+@item toupper(@var{string})
+@findex toupper
+This returns a copy of @var{string}, with each lower-case character
+in the string replaced with its corresponding upper-case character.
+Non-alphabetic characters are left unchanged.  For example,
+@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading More About @samp{\} and @samp{&} with @code{sub}, @code{gsub} and @code{gensub}
+
+@cindex escape processing, @code{sub} et. al.
+When using @code{sub}, @code{gsub} or @code{gensub}, and trying to get literal
+backslashes and ampersands into the replacement text, you need to remember
+that there are several levels of @dfn{escape processing} going on.
+
+First, there is the @dfn{lexical} level, which is when @code{awk} reads
+your program, and builds an internal copy of your program that can
+be executed.
+
+Then there is the run-time level, when @code{awk} actually scans the
+replacement string to determine what to generate.
+
+At both levels, @code{awk} looks for a defined set of characters that
+can come after a backslash.  At the lexical level, it looks for the
+escape sequences listed in @ref{Escape Sequences}.
+Thus, for every @samp{\} that @code{awk} will process at the run-time
+level, you type two @samp{\}s at the lexical level.
+When a character that is not valid for an escape sequence follows the
+@samp{\}, Unix @code{awk} and @code{gawk} both simply remove the initial
+@samp{\}, and put the following character into the string. Thus, for
+example, @code{"a\qb"} is treated as @code{"aqb"}.
+
+At the run-time level, the various functions handle sequences of
+@samp{\} and @samp{&} differently.  The situation is (sadly) somewhat complex.
+
+Historically, the @code{sub} and @code{gsub} functions treated the two
+character sequence @samp{\&} specially; this sequence was replaced in
+the generated text with a single @samp{&}.  Any other @samp{\} within
+the @var{replacement} string that did not precede an @samp{&} was passed
+through unchanged.  To illustrate with a table:
+
+@c Thank to Karl Berry for help with the TeX stuff.
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+    You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+   @code{\&}!       @code{&}!the matched text@cr
+  @code{\\&}!      @code{\&}!a literal @samp{&}@cr
+ @code{\\\&}!      @code{\&}!a literal @samp{&}@cr
+@code{\\\\&}!     @code{\\&}!a literal @samp{\&}@cr
+@code{\\\\\&}!     @code{\\&}!a literal @samp{\&}@cr
+@code{\\\\\\&}!     @code{\\\&}!a literal @samp{\\&}@cr
+  @code{\\q}!      @code{\q}!a literal @samp{\q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type         @code{sub} sees          @code{sub} generates
+ --------         ----------          ---------------
+     @code{\&}              @code{&}            the matched text
+    @code{\\&}             @code{\&}            a literal @samp{&}
+   @code{\\\&}             @code{\&}            a literal @samp{&}
+  @code{\\\\&}            @code{\\&}            a literal @samp{\&}
+ @code{\\\\\&}            @code{\\&}            a literal @samp{\&}
+@code{\\\\\\&}           @code{\\\&}            a literal @samp{\\&}
+    @code{\\q}             @code{\q}            a literal @samp{\q}
+@end display
+@end ifinfo
+
+@noindent
+This table shows both the lexical level processing, where
+an odd number of backslashes becomes an even number at the run time level,
+and the run-time processing done by @code{sub}.
+(For the sake of simplicity, the rest of the tables below only show the
+case of even numbers of @samp{\}s entered at the lexical level.)
+
+The problem with the historical approach is that there is no way to get
+a literal @samp{\} followed by the matched text.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+The 1992 POSIX standard attempted to fix this problem. The standard
+says that @code{sub} and @code{gsub} look for either a @samp{\} or an @samp{&}
+after the @samp{\}. If either one follows a @samp{\}, that character is
+output literally.  The interpretation of @samp{\} and @samp{&} then becomes
+like this:
+
+@c thanks to Karl Berry for formatting this table
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+    You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+    @code{&}!       @code{&}!the matched text@cr
+  @code{\\&}!      @code{\&}!a literal @samp{&}@cr
+@code{\\\\&}!     @code{\\&}!a literal @samp{\}, then the matched text@cr
+@code{\\\\\\&}!  @code{\\\&}!a literal @samp{\&}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type         @code{sub} sees          @code{sub} generates
+ --------         ----------          ---------------
+      @code{&}              @code{&}            the matched text
+    @code{\\&}             @code{\&}            a literal @samp{&}
+  @code{\\\\&}            @code{\\&}            a literal @samp{\}, then the matched text
+@code{\\\\\\&}           @code{\\\&}            a literal @samp{\&}
+@end display
+@end ifinfo
+
+@noindent
+This would appear to solve the problem.
+Unfortunately, the phrasing of the standard is unusual. It
+says, in effect, that @samp{\} turns off the special meaning of any
+following character, but that for anything other than @samp{\} and @samp{&},
+such special meaning is undefined.  This wording leads to two problems.
+
+@enumerate
+@item
+Backslashes must now be doubled in the @var{replacement} string, breaking
+historical @code{awk} programs.
+
+@item
+To make sure that an @code{awk} program is portable, @emph{every} character
+in the @var{replacement} string must be preceded with a
+backslash.@footnote{This consequence was certainly unintended.}
+@c I can say that, 'cause I was involved in making this change
+@end enumerate
+
+The POSIX standard is under revision.@footnote{As of @value{UPDATE-MONTH},
+with final approval and publication hopefully sometime in 1997.}
+Because of the above problems, proposed text for the revised standard
+reverts to rules that correspond more closely to the original existing
+practice. The proposed rules have special cases that make it possible
+to produce a @samp{\} preceding the matched text.
+
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+    You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+@code{\\\\\\&}!     @code{\\\&}!a literal @samp{\&}@cr
+@code{\\\\&}!     @code{\\&}!a literal @samp{\}, followed by the matched text@cr
+  @code{\\&}!      @code{\&}!a literal @samp{&}@cr
+  @code{\\q}!      @code{\q}!a literal @samp{\q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type         @code{sub} sees         @code{sub} generates
+ --------         ----------         ---------------
+@code{\\\\\\&}           @code{\\\&}            a literal @samp{\&}
+  @code{\\\\&}            @code{\\&}            a literal @samp{\}, followed by the matched text
+    @code{\\&}             @code{\&}            a literal @samp{&}
+    @code{\\q}             @code{\q}            a literal @samp{\q}
+@end display
+@end ifinfo
+
+In a nutshell, at the run-time level, there are now three special sequences
+of characters, @samp{\\\&}, @samp{\\&} and @samp{\&}, whereas historically,
+there was only one.  However, as in the historical case, any @samp{\} that
+is not part of one of these three sequences is not special, and appears
+in the output literally.
+
+@code{gawk} 3.0 follows these proposed POSIX rules for @code{sub} and
+@code{gsub}.
+@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
+Whether these proposed rules will actually become codified into the
+standard is unknown at this point. Subsequent @code{gawk} releases will
+track the standard and implement whatever the final version specifies;
+this @value{DOCUMENT} will be updated as well.
+
+The rules for @code{gensub} are considerably simpler. At the run-time
+level, whenever @code{gawk} sees a @samp{\}, if the following character
+is a digit, then the text that matched the corresponding parenthesized
+subexpression is placed in the generated output.  Otherwise,
+no matter what the character after the @samp{\} is, that character will
+appear in the generated text, and the @samp{\} will not.
+
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+    You type!@code{gensub} sees!@code{gensub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+      @code{&}!           @code{&}!the matched text@cr
+    @code{\\&}!          @code{\&}!a literal @samp{&}@cr
+   @code{\\\\}!          @code{\\}!a literal @samp{\}@cr
+  @code{\\\\&}!         @code{\\&}!a literal @samp{\}, then the matched text@cr
+@code{\\\\\\&}!        @code{\\\&}!a literal @samp{\&}@cr
+    @code{\\q}!          @code{\q}!a literal @samp{q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+  You type          @code{gensub} sees         @code{gensub} generates
+  --------          -------------         ------------------
+      @code{&}                    @code{&}            the matched text
+    @code{\\&}                   @code{\&}            a literal @samp{&}
+   @code{\\\\}                   @code{\\}            a literal @samp{\}
+  @code{\\\\&}                  @code{\\&}            a literal @samp{\}, then the matched text
+@code{\\\\\\&}                 @code{\\\&}            a literal @samp{\&}
+    @code{\\q}                   @code{\q}            a literal @samp{q}
+@end display
+@end ifinfo
+
+Because of the complexity of the lexical and run-time level processing,
+and the special cases for @code{sub} and @code{gsub},
+we recommend the use of @code{gawk} and @code{gensub} for when you have
+to do substitutions.
+
+@node I/O Functions, Time Functions, String Functions, Built-in
+@section Built-in Functions for Input/Output
+
+The following functions are related to Input/Output (I/O).
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item close(@var{filename})
+@findex close
+Close the file @var{filename}, for input or output.  The argument may
+alternatively be a shell command that was used for redirecting to or
+from a pipe; then the pipe is closed.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes},
+for more information.
+
+@item fflush(@r{[}@var{filename}@r{]})
+@findex fflush
+@cindex portability issues
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+Flush any buffered output associated @var{filename}, which is either a
+file opened for writing, or a shell command for redirecting output to
+a pipe.
+
+Many utility programs will @dfn{buffer} their output; they save information
+to be written to a disk file or terminal in memory, until there is enough
+for it to be worthwhile to send the data to the ouput device.
+This is often more efficient than writing
+every little bit of information as soon as it is ready.  However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers; that is,
+write the information to its destination, even if a buffer is not full.
+This is the purpose of the @code{fflush} function; @code{gawk} too
+buffers its output, and the @code{fflush} function can be used to force
+@code{gawk} to flush its buffers.
+
+@code{fflush} is a recent (1994) addition to the Bell Labs research
+version of @code{awk}; it is not part of the POSIX standard, and will
+not be available if @samp{--posix} has been specified on the command
+line (@pxref{Options, ,Command Line Options}).
+
+@code{gawk} extends the @code{fflush} function in two ways.  The first
+is to allow no argument at all. In this case, the buffer for the
+standard output is flushed.  The second way is to allow the null string
+(@w{@code{""}}) as the argument. In this case, the buffers for
+@emph{all} open output files and pipes are flushed.
+
+@code{fflush} returns zero if the buffer was successfully flushed,
+and nonzero otherwise.
+
+@item system(@var{command})
+@findex system
+@cindex interaction, @code{awk} and other programs
+The system function allows the user to execute operating system commands
+and then return to the @code{awk} program.  The @code{system} function
+executes the command given by the string @var{command}.  It returns, as
+its value, the status returned by the command that was executed.
+
+For example, if the following fragment of code is put in your @code{awk}
+program:
+
+@example
+END @{
+     system("date | mail -s 'awk run done' root")
+@}
+@end example
+
+@noindent
+the system administrator will be sent mail when the @code{awk} program
+finishes processing input and begins its end-of-input processing.
+
+Note that redirecting @code{print} or @code{printf} into a pipe is often
+enough to accomplish your task.  However, if your @code{awk}
+program is interactive, @code{system} is useful for cranking up large
+self-contained programs, such as a shell or an editor.
+
+Some operating systems cannot implement the @code{system} function.
+@code{system} causes a fatal error if it is not supported.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading Interactive vs. Non-Interactive Buffering
+@cindex buffering, interactive vs. non-interactive
+@cindex buffering, non-interactive vs. interactive
+@cindex interactive buffering vs. non-interactive
+@cindex non-interactive buffering vs. interactive
+
+As a side point, buffering issues can be even more confusing depending
+upon whether or not your program is @dfn{interactive}, i.e., communicating
+with a user sitting at a keyboard.@footnote{A program is interactive
+if the standard output is connected
+to a terminal device.}
+
+Interactive programs generally @dfn{line buffer} their output; they
+write out every line.  Non-interactive programs wait until they have
+a full buffer, which may be many lines of output.
+
+@c Thanks to Walter.Mecky@dresdnerbank.de for this example, and for
+@c motivating me to write this section.
+Here is an example of the difference.
+
+@example
+$ awk '@{ print $1 + $2 @}'
+1 1
+@print{} 2
+2 3
+@print{} 5
+@kbd{Control-d}
+@end example
+
+@noindent
+Each line of output is printed immediately. Compare that behavior
+with this example.
+
+@example
+$ awk '@{ print $1 + $2 @}' | cat
+1 1
+2 3
+@kbd{Control-d}
+@print{} 2
+@print{} 5
+@end example
+
+@noindent
+Here, no output is printed until after the @kbd{Control-d} is typed, since
+it is all buffered, and sent down the pipe to @code{cat} in one shot.
+
+@c fakenode --- for prepinfo
+@subheading Controlling Output Buffering with @code{system}
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+
+The @code{fflush} function provides explicit control over output buffering for
+individual files and pipes.  However, its use is not portable to many other
+@code{awk} implementations.  An alternative method to flush output
+buffers is by calling @code{system} with a null string as its argument:
+
+@example
+system("")   # flush output
+@end example
+
+@noindent
+@code{gawk} treats this use of the @code{system} function as a special
+case, and is smart enough not to run a shell (or other command
+interpreter) with the empty command.  Therefore, with @code{gawk}, this
+idiom is not only useful, it is efficient.  While this method should work
+with other @code{awk} implementations, it will not necessarily avoid
+starting an unnecessary shell.  (Other implementations may only
+flush the buffer associated with the standard output, and not necessarily
+all buffered output.)
+
+If you think about what a programmer expects, it makes sense that
+@code{system} should flush any pending output.  The following program:
+
+@example
+BEGIN @{
+     print "first print"
+     system("echo system echo")
+     print "second print"
+@}
+@end example
+
+@noindent
+must print
+
+@example
+first print
+system echo
+second print
+@end example
+
+@noindent
+and not
+
+@example
+system echo
+first print
+second print
+@end example
+
+If @code{awk} did not flush its buffers before calling @code{system}, the
+latter (undesirable) output is what you would see.
+
+@node Time Functions,  , I/O Functions, Built-in
+@section Functions for Dealing with Time Stamps
+
+@cindex timestamps
+@cindex time of day
+A common use for @code{awk} programs is the processing of log files
+containing time stamp information, indicating when a
+particular log record was written.  Many programs log their time stamp
+in the form returned by the @code{time} system call, which is the
+number of seconds since a particular epoch.  On POSIX systems,
+it is the number of seconds since Midnight, January 1, 1970, UTC.
+
+In order to make it easier to process such log files, and to produce
+useful reports, @code{gawk} provides two functions for working with time
+stamps.  Both of these are @code{gawk} extensions; they are not specified
+in the POSIX standard, nor are they in any other known version
+of @code{awk}.
+
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item systime()
+@findex systime
+This function returns the current time as the number of seconds since
+the system epoch.  On POSIX systems, this is the number of seconds
+since Midnight, January 1, 1970, UTC.  It may be a different number on
+other systems.
+
+@item strftime(@r{[}@var{format} @r{[}, @var{timestamp}@r{]]})
+@findex strftime
+This function returns a string.  It is similar to the function of the
+same name in ANSI C.  The time specified by @var{timestamp} is used to
+produce a string, based on the contents of the @var{format} string.
+The @var{timestamp} is in the same format as the value returned by the
+@code{systime} function.  If no @var{timestamp} argument is supplied,
+@code{gawk} will use the current time of day as the time stamp.
+If no @var{format} argument is supplied, @code{strftime} uses
+@code{@w{"%a %b %d %H:%M:%S %Z %Y"}}.  This format string produces
+output (almost) equivalent to that of the @code{date} utility.
+(Versions of @code{gawk} prior to 3.0 require the @var{format} argument.)
+@end table
+
+The @code{systime} function allows you to compare a time stamp from a
+log file with the current time of day.  In particular, it is easy to
+determine how long ago a particular record was logged.  It also allows
+you to produce log records using the ``seconds since the epoch'' format.
+
+The @code{strftime} function allows you to easily turn a time stamp
+into human-readable information.  It is similar in nature to the @code{sprintf}
+function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+in that it copies non-format specification characters verbatim to the
+returned string, while substituting date and time values for format
+specifications in the @var{format} string.
+
+@code{strftime} is guaranteed by the ANSI C standard to support
+the following date format specifications:
+
+@table @code
+@item %a
+The locale's abbreviated weekday name.
+
+@item %A
+The locale's full weekday name.
+
+@item %b
+The locale's abbreviated month name.
+
+@item %B
+The locale's full month name.
+
+@item %c
+The locale's ``appropriate'' date and time representation.
+
+@item %d
+The day of the month as a decimal number (01--31).
+
+@item %H
+The hour (24-hour clock) as a decimal number (00--23).
+
+@item %I
+The hour (12-hour clock) as a decimal number (01--12).
+
+@item %j
+The day of the year as a decimal number (001--366).
+
+@item %m
+The month as a decimal number (01--12).
+
+@item %M
+The minute as a decimal number (00--59).
+
+@item %p
+The locale's equivalent of the AM/PM designations associated
+with a 12-hour clock.
+
+@item %S
+The second as a decimal number (00--60).@footnote{Occasionally there are
+minutes in a year with a leap second, which is why the
+seconds can go up to 60.}
+
+@item %U
+The week number of the year (the first Sunday as the first day of week one)
+as a decimal number (00--53).
+
+@item %w
+The weekday as a decimal number (0--6).  Sunday is day zero.
+
+@item %W
+The week number of the year (the first Monday as the first day of week one)
+as a decimal number (00--53).
+
+@item %x
+The locale's ``appropriate'' date representation.
+
+@item %X
+The locale's ``appropriate'' time representation.
+
+@item %y
+The year without century as a decimal number (00--99).
+
+@item %Y
+The year with century as a decimal number (e.g., 1995).
+
+@item %Z
+The time zone name or abbreviation, or no characters if
+no time zone is determinable.
+
+@item %%
+A literal @samp{%}.
+@end table
+
+If a conversion specifier is not one of the above, the behavior is
+undefined.@footnote{This is because ANSI C leaves the
+behavior of the C version of @code{strftime} undefined, and @code{gawk}
+will use the system's version of @code{strftime} if it's there.
+Typically, the conversion specifier will either not appear in the
+returned string, or it will appear literally.}
+
+@cindex locale, definition of
+Informally, a @dfn{locale} is the geographic place in which a program
+is meant to run.  For example, a common way to abbreviate the date
+September 4, 1991 in the United States would be ``9/4/91''.
+In many countries in Europe, however, it would be abbreviated ``4.9.91''.
+Thus, the @samp{%x} specification in a @code{"US"} locale might produce
+@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
+@samp{4.9.91}.  The ANSI C standard defines a default @code{"C"}
+locale, which is an environment that is typical of what most C programmers
+are used to.
+
+A public-domain C version of @code{strftime} is supplied with @code{gawk}
+for systems that are not yet fully ANSI-compliant.  If that version is
+used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
+then the following additional format specifications are available:
+
+@table @code
+@item %D
+Equivalent to specifying @samp{%m/%d/%y}.
+
+@item %e
+The day of the month, padded with a space if it is only one digit.
+
+@item %h
+Equivalent to @samp{%b}, above.
+
+@item %n
+A newline character (ASCII LF).
+
+@item %r
+Equivalent to specifying @samp{%I:%M:%S %p}.
+
+@item %R
+Equivalent to specifying @samp{%H:%M}.
+
+@item %T
+Equivalent to specifying @samp{%H:%M:%S}.
+
+@item %t
+A tab character.
+
+@item %k
+The hour (24-hour clock) as a decimal number (0-23).
+Single digit numbers are padded with a space.
+
+@item %l
+The hour (12-hour clock) as a decimal number (1-12).
+Single digit numbers are padded with a space.
+
+@item %C
+The century, as a number between 00 and 99.
+
+@item %u
+The weekday as a decimal number
+[1 (Monday)--7].
+
+@cindex ISO 8601
+@item %V
+The week number of the year (the first Monday as the first
+day of week one) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week one, otherwise it is week 53 of the previous year
+and the next week is week one).
+
+@item %G
+The year with century of the ISO week number, as a decimal number.
+
+For example, January 1, 1993, is in week 53 of 1992. Thus, the year
+of its ISO week number is 1992, even though its year is 1993.
+Similarly, December 31, 1973, is in week 1 of 1974. Thus, the year
+of its ISO week number is 1974, even though its year is 1973.
+
+@item %g
+The year without century of the ISO week number, as a decimal number (00--99).
+
+@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
+@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
+These are ``alternate representations'' for the specifications
+that use only the second letter (@samp{%c}, @samp{%C}, and so on).
+They are recognized, but their normal representations are
+used.@footnote{If you don't understand any of this, don't worry about
+it; these facilities are meant to make it easier to ``internationalize''
+programs.}
+(These facilitate compliance with the POSIX @code{date} utility.)
+
+@item %v
+The date in VMS format (e.g., 20-JUN-1991).
+
+@cindex RFC-822
+@cindex RFC-1036
+@item %z
+The timezone offset in a +HHMM format (e.g., the format necessary to
+produce RFC-822/RFC-1036 date headers).
+@end table
+
+This example is an @code{awk} implementation of the POSIX
+@code{date} utility.  Normally, the @code{date} utility prints the
+current date and time of day in a well known format.  However, if you
+provide an argument to it that begins with a @samp{+}, @code{date}
+will copy non-format specifier characters to the standard output, and
+will interpret the current time according to the format specifiers in
+the string.  For example:
+
+@example
+$ date '+Today is %A, %B %d, %Y.'
+@print{} Today is Thursday, July 11, 1991.
+@end example
+
+Here is the @code{gawk} version of the @code{date} utility.
+It has a shell ``wrapper'', to handle the @samp{-u} option,
+which requires that @code{date} run as if the time zone
+was set to UTC.
+
+@example
+@group
+#! /bin/sh
+#
+# date --- approximate the P1003.2 'date' command
+
+case $1 in
+-u)  TZ=GMT0     # use UTC
+     export TZ
+     shift ;;
+esac
+@end group
+
+@group
+gawk 'BEGIN  @{
+    format = "%a %b %d %H:%M:%S %Z %Y"
+    exitval = 0
+@end group
+
+@group
+    if (ARGC > 2)
+        exitval = 1
+    else if (ARGC == 2) @{
+        format = ARGV[1]
+        if (format ~ /^\+/)
+            format = substr(format, 2)   # remove leading +
+    @}
+    print strftime(format)
+    exit exitval
+@}' "$@@"
+@end group
+@end example
+
+@node User-defined, Invoking Gawk, Built-in, Top
+@chapter User-defined Functions
+
+@cindex user-defined functions
+@cindex functions, user-defined
+Complicated @code{awk} programs can often be simplified by defining
+your own functions.  User-defined functions can be called just like
+built-in ones (@pxref{Function Calls}), but it is up to you to define
+them---to tell @code{awk} what they should do.
+
+@menu
+* Definition Syntax::           How to write definitions and what they mean.
+* Function Example::            An example function definition and what it
+                                does.
+* Function Caveats::            Things to watch out for.
+* Return Statement::            Specifying the value a function returns.
+@end menu
+
+@node Definition Syntax, Function Example, User-defined, User-defined
+@section Function Definition Syntax
+@cindex defining functions
+@cindex function definition
+
+Definitions of functions can appear anywhere between the rules of an
+@code{awk} program.  Thus, the general form of an @code{awk} program is
+extended to include sequences of rules @emph{and} user-defined function
+definitions.
+There is no need in @code{awk} to put the definition of a function
+before all uses of the function.  This is because @code{awk} reads the
+entire program before starting to execute any of it.
+
+The definition of a function named @var{name} looks like this:
+
+@example
+function @var{name}(@var{parameter-list})
+@{
+     @var{body-of-function}
+@}
+@end example
+
+@cindex names, use of
+@cindex namespaces
+@noindent
+@var{name} is the name of the function to be defined.  A valid function
+name is like a valid variable name: a sequence of letters, digits and
+underscores, not starting with a digit.
+Within a single @code{awk} program, any particular name can only be
+used as a variable, array or function.
+
+@var{parameter-list} is a list of the function's arguments and local
+variable names, separated by commas.  When the function is called,
+the argument names are used to hold the argument values given in
+the call.  The local variables are initialized to the empty string.
+A function cannot have two parameters with the same name.
+
+The @var{body-of-function} consists of @code{awk} statements.  It is the
+most important part of the definition, because it says what the function
+should actually @emph{do}.  The argument names exist to give the body a
+way to talk about the arguments; local variables, to give the body
+places to keep temporary values.
+
+Argument names are not distinguished syntactically from local variable
+names; instead, the number of arguments supplied when the function is
+called determines how many argument variables there are.  Thus, if three
+argument values are given, the first three names in @var{parameter-list}
+are arguments, and the rest are local variables.
+
+It follows that if the number of arguments is not the same in all calls
+to the function, some of the names in @var{parameter-list} may be
+arguments on some occasions and local variables on others.  Another
+way to think of this is that omitted arguments default to the
+null string.
+
+Usually when you write a function you know how many names you intend to
+use for arguments and how many you intend to use as local variables.  It is
+conventional to place some extra space between the arguments and
+the local variables, to document how your function is supposed to be used.
+
+@cindex variable shadowing
+During execution of the function body, the arguments and local variable
+values hide or @dfn{shadow} any variables of the same names used in the
+rest of the program.  The shadowed variables are not accessible in the
+function definition, because there is no way to name them while their
+names have been taken away for the local variables.  All other variables
+used in the @code{awk} program can be referenced or set normally in the
+function's body.
+
+The arguments and local variables last only as long as the function body
+is executing.  Once the body finishes, you can once again access the
+variables that were shadowed while the function was running.
+
+@cindex recursive function
+@cindex function, recursive
+The function body can contain expressions which call functions.  They
+can even call this function, either directly or by way of another
+function.  When this happens, we say the function is @dfn{recursive}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In many @code{awk} implementations, including @code{gawk},
+the keyword @code{function} may be
+abbreviated @code{func}.  However, POSIX only specifies the use of
+the keyword @code{function}.  This actually has some practical implications.
+If @code{gawk} is in POSIX-compatibility mode
+(@pxref{Options, ,Command Line Options}), then the following
+statement will @emph{not} define a function:
+
+@example
+func foo() @{ a = sqrt($1) ; print a @}
+@end example
+
+@noindent
+Instead it defines a rule that, for each record, concatenates the value
+of the variable @samp{func} with the return value of the function @samp{foo}.
+If the resulting string is non-null, the action is executed.
+This is probably not what was desired.  (@code{awk} accepts this input as
+syntactically valid, since functions may be used before they are defined
+in @code{awk} programs.)
+
+@cindex portability issues
+To ensure that your @code{awk} programs are portable, always use the
+keyword @code{function} when defining a function.
+
+@node Function Example, Function Caveats, Definition Syntax, User-defined
+@section Function Definition Examples
+
+Here is an example of a user-defined function, called @code{myprint}, that
+takes a number and prints it in a specific format.
+
+@example
+function myprint(num)
+@{
+     printf "%6.3g\n", num
+@}
+@end example
+
+@noindent
+To illustrate, here is an @code{awk} rule which uses our @code{myprint}
+function:
+
+@example
+$3 > 0     @{ myprint($3) @}
+@end example
+
+@noindent
+This program prints, in our special format, all the third fields that
+contain a positive number in our input.  Therefore, when given:
+
+@example
+@group
+ 1.2   3.4    5.6   7.8
+ 9.10 11.12 -13.14 15.16
+17.18 19.20  21.22 23.24
+@end group
+@end example
+
+@noindent
+this program, using our function to format the results, prints:
+
+@example
+   5.6
+  21.2
+@end example
+
+This function deletes all the elements in an array.
+
+@example
+function delarray(a,    i)
+@{
+    for (i in a)
+       delete a[i]
+@}
+@end example
+
+When working with arrays, it is often necessary to delete all the elements
+in an array and start over with a new list of elements
+(@pxref{Delete, ,The @code{delete} Statement}).
+Instead of having
+to repeat this loop everywhere in your program that you need to clear out
+an array, your program can just call @code{delarray}.
+
+Here is an example of a recursive function.  It takes a string
+as an input parameter, and returns the string in backwards order.
+
+@example
+function rev(str, start)
+@{
+    if (start == 0)
+        return ""
+
+    return (substr(str, start, 1) rev(str, start - 1))
+@}
+@end example
+
+If this function is in a file named @file{rev.awk}, we can test it
+this way:
+
+@example
+$ echo "Don't Panic!" |
+> gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk
+@print{} !cinaP t'noD
+@end example
+
+Here is an example that uses the built-in function @code{strftime}.
+(@xref{Time Functions, ,Functions for Dealing with Time Stamps},
+for more information on @code{strftime}.)
+The C @code{ctime} function takes a timestamp and returns it in a string,
+formatted in a well known fashion.  Here is an @code{awk} version:
+
+@example
+@c file eg/lib/ctime.awk
+@group
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts,    format)
+@{
+    format = "%a %b %d %H:%M:%S %Z %Y"
+    if (ts == 0)
+        ts = systime()       # use current time as default
+    return strftime(format, ts)
+@}
+@c endfile
+@end group
+@end example
+
+@node Function Caveats, Return Statement, Function Example, User-defined
+@section Calling User-defined Functions
+
+@cindex call by value
+@cindex call by reference
+@cindex calling a function
+@cindex function call
+@dfn{Calling a function} means causing the function to run and do its job.
+A function call is an expression, and its value is the value returned by
+the function.
+
+A function call consists of the function name followed by the arguments
+in parentheses.  What you write in the call for the arguments are
+@code{awk} expressions; each time the call is executed, these
+expressions are evaluated, and the values are the actual arguments.  For
+example, here is a call to @code{foo} with three arguments (the first
+being a string concatenation):
+
+@example
+foo(x y, "lose", 4 * z)
+@end example
+
+@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
+between the function name and the open-parenthesis of the argument list.
+If you write whitespace by mistake, @code{awk} might think that you mean
+to concatenate a variable with an expression in parentheses.  However, it
+notices that you used a function name and not a variable name, and reports
+an error.
+
+@cindex call by value
+When a function is called, it is given a @emph{copy} of the values of
+its arguments.  This is known as @dfn{call by value}.  The caller may use
+a variable as the expression for the argument, but the called function
+does not know this: it only knows what value the argument had.  For
+example, if you write this code:
+
+@example
+foo = "bar"
+z = myfunc(foo)
+@end example
+
+@noindent
+then you should not think of the argument to @code{myfunc} as being
+``the variable @code{foo}.''  Instead, think of the argument as the
+string value, @code{"bar"}.
+
+If the function @code{myfunc} alters the values of its local variables,
+this has no effect on any other variables.  Thus, if @code{myfunc}
+does this:
+
+@example
+@group
+function myfunc(str)
+@{
+  print str
+  str = "zzz"
+  print str
+@}
+@end group
+@end example
+
+@noindent
+to change its first argument variable @code{str}, this @emph{does not}
+change the value of @code{foo} in the caller.  The role of @code{foo} in
+calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
+If @code{str} also exists outside of @code{myfunc}, the function body
+cannot alter this outer value, because it is shadowed during the
+execution of @code{myfunc} and cannot be seen or changed from there.
+
+@cindex call by reference
+However, when arrays are the parameters to functions, they are @emph{not}
+copied.  Instead, the array itself is made available for direct manipulation
+by the function.  This is usually called @dfn{call by reference}.
+Changes made to an array parameter inside the body of a function @emph{are}
+visible outside that function.  
+@ifinfo
+This can be @strong{very} dangerous if you do not watch what you are
+doing.  For example:
+@end ifinfo
+@iftex
+@emph{This can be very dangerous if you do not watch what you are
+doing.}  For example:
+@end iftex
+
+@example
+function changeit(array, ind, nvalue)
+@{
+     array[ind] = nvalue
+@}
+
+BEGIN @{
+    a[1] = 1; a[2] = 2; a[3] = 3
+    changeit(a, 2, "two")
+    printf "a[1] = %s, a[2] = %s, a[3] = %s\n",
+            a[1], a[2], a[3]
+@}
+@end example
+
+@noindent
+This program prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
+@code{changeit} stores @code{"two"} in the second element of @code{a}.
+
+@cindex undefined functions
+@cindex functions, undefined
+Some @code{awk} implementations allow you to call a function that
+has not been defined, and only report a problem at run-time when the
+program actually tries to call the function. For example:
+
+@example
+@group
+BEGIN @{
+    if (0)
+        foo()
+    else
+        bar()
+@}
+function bar() @{ @dots{} @}
+# note that `foo' is not defined
+@end group
+@end example
+
+@noindent
+Since the @samp{if} statement will never be true, it is not really a
+problem that @code{foo} has not been defined.  Usually though, it is a
+problem if a program calls an undefined function.
+
+@ignore
+At one point, I had gawk dieing on this, but later decided that this might
+break old programs and/or test suites.
+@end ignore
+
+If @samp{--lint} has been specified
+(@pxref{Options, ,Command Line Options}),
+@code{gawk} will report about calls to undefined functions.
+
+Some @code{awk} implementations generate a run-time
+error if you use the @code{next} statement
+(@pxref{Next Statement, , The @code{next} Statement})
+inside a user-defined function.
+@code{gawk} does not have this problem.
+
+@node Return Statement,  , Function Caveats, User-defined
+@section The @code{return} Statement
+@cindex @code{return} statement
+
+The body of a user-defined function can contain a @code{return} statement.
+This statement returns control to the rest of the @code{awk} program.  It
+can also be used to return a value for use in the rest of the @code{awk}
+program.  It looks like this:
+
+@example
+return @r{[}@var{expression}@r{]}
+@end example
+
+The @var{expression} part is optional.  If it is omitted, then the returned
+value is undefined and, therefore, unpredictable.
+
+A @code{return} statement with no value expression is assumed at the end of
+every function definition.  So if control reaches the end of the function
+body, then the function returns an unpredictable value.  @code{awk}
+will @emph{not} warn you if you use the return value of such a function.
+
+Sometimes, you want to write a function for what it does, not for
+what it returns.  Such a function corresponds to a @code{void} function
+in C or to a @code{procedure} in Pascal.  Thus, it may be appropriate to not
+return any value; you should simply bear in mind that if you use the return
+value of such a function, you do so at your own risk.
+
+Here is an example of a user-defined function that returns a value
+for the largest number among the elements of an array:
+
+@example
+@group
+function maxelt(vec,   i, ret)
+@{
+     for (i in vec) @{
+          if (ret == "" || vec[i] > ret)
+               ret = vec[i]
+     @}
+     return ret
+@}
+@end group
+@end example
+
+@noindent
+You call @code{maxelt} with one argument, which is an array name.  The local
+variables @code{i} and @code{ret} are not intended to be arguments;
+while there is nothing to stop you from passing two or three arguments
+to @code{maxelt}, the results would be strange.  The extra space before
+@code{i} in the function parameter list indicates that @code{i} and
+@code{ret} are not supposed to be arguments.  This is a convention that
+you should follow when you define functions.
+
+Here is a program that uses our @code{maxelt} function.  It loads an
+array, calls @code{maxelt}, and then reports the maximum number in that
+array:
+
+@example
+@group
+awk '
+function maxelt(vec,   i, ret)
+@{
+     for (i in vec) @{
+          if (ret == "" || vec[i] > ret)
+               ret = vec[i]
+     @}
+     return ret
+@}
+@end group
+
+@group
+# Load all fields of each record into nums.
+@{
+     for(i = 1; i <= NF; i++)
+          nums[NR, i] = $i
+@}
+
+END @{
+     print maxelt(nums)
+@}'
+@end group
+@end example
+
+Given the following input:
+
+@example
+@group
+ 1 5 23 8 16
+44 3 5 2 8 26
+256 291 1396 2962 100
+-6 467 998 1101
+99385 11 0 225
+@end group
+@end example
+
+@noindent
+our program tells us (predictably) that @code{99385} is the largest number
+in our array.
+
+@node Invoking Gawk, Library Functions, User-defined, Top
+@chapter Running @code{awk}
+@cindex command line
+@cindex invocation of @code{gawk}
+@cindex arguments, command line
+@cindex options, command line
+@cindex long options
+@cindex options, long
+
+There are two ways to run @code{awk}: with an explicit program, or with
+one or more program files.  Here are templates for both of them; items
+enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
+
+Besides traditional one-letter POSIX-style options, @code{gawk} also
+supports GNU long options.
+
+@example
+awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+@cindex empty program
+@cindex dark corner
+It is possible to invoke @code{awk} with an empty program:
+
+@example
+$ awk '' datafile1 datafile2
+@end example
+
+@noindent
+Doing so makes little sense though; @code{awk} will simply exit
+silently when given an empty program (d.c.).  If @samp{--lint} has
+been specified on the command line, @code{gawk} will issue a
+warning that the program is empty.
+
+@menu
+* Options::                     Command line options and their meanings.
+* Other Arguments::             Input file names and variable assignments.
+* AWKPATH Variable::            Searching directories for @code{awk} programs.
+* Obsolete::                    Obsolete Options and/or features.
+* Undocumented::                Undocumented Options and Features.
+* Known Bugs::                  Known Bugs in @code{gawk}.
+@end menu
+
+@node Options, Other Arguments, Invoking Gawk, Invoking Gawk
+@section Command Line Options
+
+Options begin with a dash, and consist of a single character.
+GNU style long options consist of two dashes and a keyword.
+The keyword can be abbreviated, as long the abbreviation allows the option
+to be uniquely identified.  If the option takes an argument, then the
+keyword is either immediately followed by an equals sign (@samp{=}) and the
+argument's value, or the keyword and the argument's value are separated
+by whitespace.  For brevity, the discussion below only refers to the
+traditional short options; however the long and short options are
+interchangeable in all contexts.
+
+Each long option for @code{gawk} has a corresponding
+POSIX-style option.  The options and their meanings are as follows:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+@cindex @code{-F} option
+@cindex @code{--field-separator} option
+Sets the @code{FS} variable to @var{fs}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item -f @var{source-file}
+@itemx --file @var{source-file}
+@cindex @code{-f} option
+@cindex @code{--file} option
+Indicates that the @code{awk} program is to be found in @var{source-file}
+instead of in the first non-option argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+@cindex @code{-v} option
+@cindex @code{--assign} option
+Sets the variable @var{var} to the value @var{val} @strong{before}
+execution of the program begins.  Such variable values are available
+inside the @code{BEGIN} rule
+(@pxref{Other Arguments, ,Other Command Line Arguments}).
+
+The @samp{-v} option can only set one variable, but you can use
+it more than once, setting another variable each time, like this:
+@samp{awk @w{-v foo=1} @w{-v bar=2} @dots{}}.
+
+@item -mf @var{NNN}
+@itemx -mr @var{NNN}
+Set various memory limits to the value @var{NNN}.  The @samp{f} flag sets
+the maximum number of fields, and the @samp{r} flag sets the maximum
+record size.  These two flags and the @samp{-m} option are from the
+Bell Labs research version of Unix @code{awk}.  They are provided
+for compatibility, but otherwise ignored by
+@code{gawk}, since @code{gawk} has no predefined limits.
+
+@item -W @var{gawk-opt}
+@cindex @code{-W} option
+Following the POSIX standard, options that are implementation
+specific are supplied as arguments to the @samp{-W} option.  These options
+also have corresponding GNU style long options.
+See below.
+
+@item --
+Signals the end of the command line options.  The following arguments
+are not treated as options even if they begin with @samp{-}.  This
+interpretation of @samp{--} follows the POSIX argument parsing
+conventions.
+
+This is useful if you have file names that start with @samp{-},
+or in shell scripts, if you have file names that will be specified
+by the user which could start with @samp{-}.
+@end table
+
+The following @code{gawk}-specific options are available:
+
+@table @code
+@item -W traditional
+@itemx -W compat
+@itemx --traditional
+@itemx --compat
+@cindex @code{--compat} option
+@cindex @code{--traditional} option
+@cindex compatibility mode
+Specifies @dfn{compatibility mode}, in which the GNU extensions to
+the @code{awk} language are disabled, so that @code{gawk} behaves just
+like the Bell Labs research version of Unix @code{awk}.
+@samp{--traditional} is the preferred form of this option.
+@xref{POSIX/GNU, ,Extensions in @code{gawk} Not in POSIX @code{awk}},
+which summarizes the extensions.  Also see
+@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+@cindex @code{--copyleft} option
+@cindex @code{--copyright} option
+Print the short version of the General Public License, and then exit.
+This option may disappear in a future version of @code{gawk}.  
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+@cindex @code{--help} option
+@cindex @code{--usage} option
+Print a ``usage'' message summarizing the short and long style options
+that @code{gawk} accepts, and then exit.
+
+@item -W lint
+@itemx --lint
+@cindex @code{--lint} option
+Warn about constructs that are dubious or non-portable to
+other @code{awk} implementations.
+Some warnings are issued when @code{gawk} first reads your program.  Others
+are issued at run-time, as your program executes.
+
+@item -W lint-old
+@itemx --lint-old
+@cindex @code{--lint-old} option
+Warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}
+(@pxref{V7/SVR3.1, , Major Changes between V7 and SVR3.1}).
+
+@item -W posix
+@itemx --posix
+@cindex @code{--posix} option
+@cindex POSIX mode
+Operate in strict POSIX mode.  This disables all @code{gawk}
+extensions (just like @samp{--traditional}), and adds the following additional
+restrictions:
+
+@c IMPORTANT! Keep this list in sync with the one in node POSIX
+
+@itemize @bullet
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Escape Sequences}).
+
+@item
+Newlines do not act as whitespace to separate fields when @code{FS} is
+equal to a single space.
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Function Definition Syntax}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item
+The @code{fflush} built-in function is not supported
+(@pxref{I/O Functions, , Built-in Functions for Input/Output}).
+@end itemize
+
+If you supply both @samp{--traditional} and @samp{--posix} on the
+command line, @samp{--posix} will take precedence. @code{gawk}
+will also issue a warning if both options are supplied.
+
+@item -W re-interval
+@itemx --re-interval
+Allow interval expressions
+(@pxref{Regexp Operators, , Regular Expression Operators}),
+in regexps.
+Because interval expressions were traditionally not available in @code{awk},
+@code{gawk} does not provide them by default. This prevents old @code{awk}
+programs from breaking.
+
+@item -W source @var{program-text}
+@itemx --source @var{program-text}
+@cindex @code{--source} option
+Program source code is taken from the @var{program-text}.  This option
+allows you to mix source code in files with source
+code that you enter on the command line. This is particularly useful
+when you have library functions that you wish to use from your command line
+programs (@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+@item -W version
+@itemx --version
+@cindex @code{--version} option
+Prints version information for this particular copy of @code{gawk}.
+This allows you to determine if your copy of @code{gawk} is up to date
+with respect to whatever the Free Software Foundation is currently
+distributing.
+It is also useful for bug reports
+(@pxref{Bugs,  , Reporting Problems and Bugs}).
+@end table
+
+Any other options are flagged as invalid with a warning message, but
+are otherwise ignored.
+
+In compatibility mode, as a special case, if the value of @var{fs} supplied
+to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
+character (@code{"\t"}).  This is only true for @samp{--traditional}, and not
+for @samp{--posix}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+The @samp{-f} option may be used more than once on the command line.
+If it is, @code{awk} reads its program source from all of the named files, as
+if they had been concatenated together into one big file.  This is
+useful for creating libraries of @code{awk} functions.  Useful functions
+can be written once, and then retrieved from a standard place, instead
+of having to be included into each individual program.
+
+You can type in a program at the terminal and still use library functions,
+by specifying @samp{-f /dev/tty}.  @code{awk} will read a file from the terminal
+to use as part of the @code{awk} program.  After typing your program,
+type @kbd{Control-d} (the end-of-file character) to terminate it.
+(You may also use @samp{-f -} to read program source from the standard
+input, but then you will not be able to also use the standard input as a
+source of data.)
+
+Because it is clumsy using the standard @code{awk} mechanisms to mix source
+file and command line @code{awk} programs, @code{gawk} provides the
+@samp{--source} option.  This does not require you to pre-empt the standard
+input for your source code, and allows you to easily mix command line
+and library source code
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+If no @samp{-f} or @samp{--source} option is specified, then @code{gawk}
+will use the first non-option command line argument as the text of the
+program source code.
+
+@cindex @code{POSIXLY_CORRECT} environment variable
+@cindex environment variable, @code{POSIXLY_CORRECT}
+If the environment variable @code{POSIXLY_CORRECT} exists,
+then @code{gawk} will behave in strict POSIX mode, exactly as if
+you had supplied the @samp{--posix} command line option.
+Many GNU programs look for this environment variable to turn on
+strict POSIX mode. If you supply @samp{--lint} on the command line,
+and @code{gawk} turns on POSIX mode because of @code{POSIXLY_CORRECT},
+then it will print a warning message indicating that POSIX
+mode is in effect.
+
+You would typically set this variable in your shell's startup file.
+For a Bourne compatible shell (such as Bash), you would add these
+lines to the @file{.profile} file in your home directory.
+
+@example
+@group
+POSIXLY_CORRECT=true
+export POSIXLY_CORRECT
+@end group
+@end example
+
+For a @code{csh} compatible shell,@footnote{Not recommended.}
+you would add this line to the @file{.login} file in your home directory.
+
+@example
+setenv POSIXLY_CORRECT true
+@end example
+
+@node Other Arguments, AWKPATH Variable, Options, Invoking Gawk
+@section Other Command Line Arguments
+
+Any additional arguments on the command line are normally treated as
+input files to be processed in the order specified.   However, an
+argument that has the form @code{@var{var}=@var{value}}, assigns
+the value @var{value} to the variable @var{var}---it does not specify a
+file at all.
+
+@vindex ARGIND
+@vindex ARGV
+All these arguments are made available to your @code{awk} program in the
+@code{ARGV} array (@pxref{Built-in Variables}).  Command line options
+and the program text (if present) are omitted from @code{ARGV}.
+All other arguments, including variable assignments, are
+included.   As each element of @code{ARGV} is processed, @code{gawk}
+sets the variable @code{ARGIND} to the index in @code{ARGV} of the
+current element.
+
+The distinction between file name arguments and variable-assignment
+arguments is made when @code{awk} is about to open the next input file.
+At that point in execution, it checks the ``file name'' to see whether
+it is really a variable assignment; if so, @code{awk} sets the variable
+instead of reading a file.
+
+Therefore, the variables actually receive the given values after all
+previously specified files have been read.  In particular, the values of
+variables assigned in this fashion are @emph{not} available inside a
+@code{BEGIN} rule
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}),
+since such rules are run before @code{awk} begins scanning the argument list.
+
+@cindex dark corner
+The variable values given on the command line are processed for escape
+sequences (d.c.) (@pxref{Escape Sequences}).
+
+In some earlier implementations of @code{awk}, when a variable assignment
+occurred before any file names, the assignment would happen @emph{before}
+the @code{BEGIN} rule was executed.  @code{awk}'s behavior was thus
+inconsistent; some command line assignments were available inside the
+@code{BEGIN} rule, while others were not.  However,
+some applications came to depend
+upon this ``feature.''  When @code{awk} was changed to be more consistent,
+the @samp{-v} option was added to accommodate applications that depended
+upon the old behavior.
+
+The variable assignment feature is most useful for assigning to variables
+such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
+output formats, before scanning the data files.  It is also useful for
+controlling state if multiple passes are needed over a data file.  For
+example:
+
+@cindex multiple passes over data
+@cindex passes, multiple
+@example
+awk 'pass == 1  @{ @var{pass 1 stuff} @}
+     pass == 2  @{ @var{pass 2 stuff} @}' pass=1 mydata pass=2 mydata
+@end example
+
+Given the variable assignment feature, the @samp{-F} option for setting
+the value of @code{FS} is not
+strictly necessary.  It remains for historical compatibility.
+
+@node AWKPATH Variable, Obsolete, Other Arguments, Invoking Gawk
+@section The @code{AWKPATH} Environment Variable
+@cindex @code{AWKPATH} environment variable
+@cindex environment variable, @code{AWKPATH}
+@cindex search path
+@cindex directory search
+@cindex path, search
+@cindex differences between @code{gawk} and @code{awk}
+
+The previous section described how @code{awk} program files can be named
+on the command line with the @samp{-f} option.  In most @code{awk}
+implementations, you must supply a precise path name for each program
+file, unless the file is in the current directory.
+
+@cindex search path, for source files
+But in @code{gawk}, if the file name supplied to the @samp{-f} option
+does not contain a @samp{/}, then @code{gawk} searches a list of
+directories (called the @dfn{search path}), one by one, looking for a
+file with the specified name.
+
+The search path is a string consisting of directory names
+separated by colons.  @code{gawk} gets its search path from the
+@code{AWKPATH} environment variable.  If that variable does not exist,
+@code{gawk} uses a default path, which is
+@samp{.:/usr/local/share/awk}.@footnote{Your version of @code{gawk}
+may use a directory that is different than @file{/usr/local/share/awk}; it
+will depend upon how @code{gawk} was built and installed. The actual
+directory will be the value of @samp{$(datadir)} generated when
+@code{gawk} was configured.  You probably don't need to worry about this
+though.} (Programs written for use by
+system administrators should use an @code{AWKPATH} variable that
+does not include the current directory, @file{.}.)
+
+The search path feature is particularly useful for building up libraries
+of useful @code{awk} functions.  The library files can be placed in a
+standard directory that is in the default path, and then specified on
+the command line with a short file name.  Otherwise, the full file name
+would have to be typed for each file.
+
+By using both the @samp{--source} and @samp{-f} options, your command line
+@code{awk} programs can use facilities in @code{awk} library files.
+@xref{Library Functions, , A Library of @code{awk} Functions}.
+
+Path searching is not done if @code{gawk} is in compatibility mode.
+This is true for both @samp{--traditional} and @samp{--posix}.
+@xref{Options, ,Command Line Options}.
+
+@strong{Note:} if you want files in the current directory to be found,
+you must include the current directory in the path, either by including
+@file{.} explicitly in the path, or by writing a null entry in the
+path.  (A null entry is indicated by starting or ending the path with a
+colon, or by placing two colons next to each other (@samp{::}).)  If the
+current directory is not included in the path, then files cannot be
+found in the current directory.  This path search mechanism is identical
+to the shell's.
+@c someday, @cite{The Bourne Again Shell}....
+
+Starting with version 3.0, if @code{AWKPATH} is not defined in the
+environment, @code{gawk} will place its default search path into
+@code{ENVIRON["AWKPATH"]}. This makes it easy to determine
+the actual search path @code{gawk} will use.
+
+@node Obsolete, Undocumented, AWKPATH Variable, Invoking Gawk
+@section Obsolete Options and/or Features
+
+@cindex deprecated options
+@cindex obsolete options
+@cindex deprecated features
+@cindex obsolete features
+This section describes features and/or command line options from
+previous releases of @code{gawk} that are either not available in the
+current version, or that are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+@c update this section for each release!
+
+For version @value{VERSION}.@value{PATCHLEVEL} of @code{gawk}, there are no
+command line options
+or other deprecated features from the previous version of @code{gawk}.
+@iftex
+This section
+@end iftex
+@ifinfo
+This node
+@end ifinfo
+is thus essentially a place holder,
+in case some option becomes obsolete in a future version of @code{gawk}.
+
+@ignore
+@c This is pretty old news...
+The public-domain version of @code{strftime} that is distributed with
+@code{gawk} changed for the 2.14 release.  The @samp{%V} conversion specifier
+that used to generate the date in VMS format was changed to @samp{%v}.
+This is because the POSIX standard for the @code{date} utility now
+specifies a @samp{%V} conversion specifier.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
+@end ignore
+
+@node Undocumented, Known Bugs, Obsolete, Invoking Gawk
+@section Undocumented Options and Features
+@cindex undocumented features
+@display
+@i{Use the Source, Luke!}
+Obi-Wan
+@end display
+@sp 1
+
+This section intentionally left blank.
+
+@c Read The Source, Luke!
+
+@ignore
+@c If these came out in the Info file or TeX document, then they wouldn't
+@c be undocumented, would they?
+
+@code{gawk} has one undocumented option:
+
+@table @code
+@item -W nostalgia
+@itemx --nostalgia
+Print the message @code{"awk: bailing out near line 1"} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @code{awk}, and by a t--shirt.
+@end table
+
+Early versions of @code{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @code{awk} programs.  Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@code{gawk} actually supports this, but it is purposely undocumented
+since it is considered bad style.  The correct way to write such a program
+is either
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or
+
+@example
+awk '@{ sum += $1 @}
+     END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a fuller
+explanation.
+
+@end ignore
+
+@node Known Bugs, , Undocumented, Invoking Gawk
+@section Known Bugs in @code{gawk}
+@cindex bugs, known in @code{gawk}
+@cindex known bugs
+
+@itemize @bullet
+@item
+The @samp{-F} option for changing the value of @code{FS}
+(@pxref{Options, ,Command Line Options})
+is not necessary given the command line variable
+assignment feature; it remains only for backwards compatibility.
+
+@item
+If your system actually has support for @file{/dev/fd} and the
+associated @file{/dev/stdin}, @file{/dev/stdout}, and
+@file{/dev/stderr} files, you may get different output from @code{gawk}
+than you would get on a system without those files.  When @code{gawk}
+interprets these files internally, it synchronizes output to the
+standard output with output to @file{/dev/stdout}, while on a system
+with those files, the output is actually to different open files
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+
+@item
+Syntactically invalid single character programs tend to overflow
+the parse stack, generating a rather unhelpful message.  Such programs
+are surprisingly difficult to diagnose in the completely general case,
+and the effort to do so really is not worth it.
+@end itemize
+
+@node Library Functions, Sample Programs, Invoking Gawk, Top
+@chapter A Library of @code{awk} Functions
+
+@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
+This chapter presents a library of useful @code{awk} functions.  The
+sample programs presented later
+(@pxref{Sample Programs, ,Practical @code{awk} Programs})
+use these functions.
+The functions are presented here in a progression from simple to complex.
+
+@ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+presents a program that you can use to extract the source code for
+these example library functions and programs from the Texinfo source
+for this @value{DOCUMENT}.
+(This has already been done as part of the @code{gawk} distribution.)
+
+If you have written one or more useful, general purpose @code{awk} functions,
+and would like to contribute them for a subsequent edition of this @value{DOCUMENT},
+please contact the author.  @xref{Bugs, ,Reporting Problems and Bugs},
+for information on doing this.  Don't just send code, as you will be
+required to either place your code in the public domain,
+publish it under the GPL (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}),
+or assign the copyright in it to the Free Software Foundation.
+
+@menu
+* Portability Notes::           What to do if you don't have @code{gawk}.
+* Nextfile Function::           Two implementations of a @code{nextfile}
+                                function.
+* Assert Function::             A function for assertions in @code{awk}
+                                programs.
+* Round Function::              A function for rounding if @code{sprintf} does
+                                not do it correctly.
+* Ordinal Functions::           Functions for using characters as numbers and
+                                vice versa.
+* Join Function::               A function to join an array into a string.
+* Mktime Function::             A function to turn a date into a timestamp.
+* Gettimeofday Function::       A function to get formatted times.
+* Filetrans Function::          A function for handling data file transitions.
+* Getopt Function::             A function for processing command line
+                                arguments.
+* Passwd Functions::            Functions for getting user information.
+* Group Functions::             Functions for getting group information.
+* Library Names::               How to best name private global variables in
+                                library functions.
+@end menu
+
+@node Portability Notes, Nextfile Function, Library Functions, Library Functions
+@section Simulating @code{gawk}-specific Features
+@cindex portability issues
+
+The programs in this chapter and in
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+freely use features that are specific to @code{gawk}.
+This section briefly discusses how you can rewrite these programs for
+different implementations of @code{awk}.
+
+Diagnostic error messages are sent to @file{/dev/stderr}.
+Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"}, if your system
+does not have a @file{/dev/stderr}, or if you cannot use @code{gawk}.
+
+A number of programs use @code{nextfile}
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}),
+to skip any remaining input in the input file.
+@ref{Nextfile Function, ,Implementing @code{nextfile} as a Function},
+shows you how to write a function that will do the same thing.
+
+Finally, some of the programs choose to ignore upper-case and lower-case
+distinctions in their input. They do this by assigning one to @code{IGNORECASE}.
+You can achieve the same effect by adding the following rule to the
+beginning of the program:
+
+@example
+# ignore case
+@{ $0 = tolower($0) @}
+@end example
+
+@noindent
+Also, verify that all regexp and string constants used in
+comparisons only use lower-case letters.
+
+@node Nextfile Function, Assert Function, Portability Notes, Library Functions
+@section Implementing @code{nextfile} as a Function
+
+@cindex skipping input files
+@cindex input files, skipping
+The @code{nextfile} statement presented in
+@ref{Nextfile Statement, ,The @code{nextfile} Statement},
+is a @code{gawk}-specific extension.  It is not available in other
+implementations of @code{awk}.  This section shows two versions of a
+@code{nextfile} function that you can use to simulate @code{gawk}'s
+@code{nextfile} statement if you cannot use @code{gawk}.
+
+Here is a first attempt at writing a @code{nextfile} function.
+
+@example
+@group
+# nextfile --- skip remaining records in current file
+
+# this should be read in before the "main" awk program
+
+function nextfile()    @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME  @{ next @}
+@end group
+@end example
+
+This file should be included before the main program, because it supplies
+a rule that must be executed first.  This rule compares the current data
+file's name (which is always in the @code{FILENAME} variable) to a private
+variable named @code{_abandon_}.  If the file name matches, then the action
+part of the rule executes a @code{next} statement, to go on to the next
+record.  (The use of @samp{_} in the variable name is a convention.
+It is discussed more fully in
+@ref{Library Names,  , Naming Library Function Global Variables}.)
+
+The use of the @code{next} statement effectively creates a loop that reads
+all the records from the current data file.
+Eventually, the end of the file is reached, and
+a new data file is opened, changing the value of @code{FILENAME}.
+Once this happens, the comparison of @code{_abandon_} to @code{FILENAME}
+fails, and execution continues with the first rule of the ``real'' program.
+
+The @code{nextfile} function itself simply sets the value of @code{_abandon_}
+and then executes a @code{next} statement to start the loop
+going.@footnote{Some implementations of @code{awk} do not allow you to
+execute @code{next} from within a function body. Some other work-around
+will be necessary if you use such a version.}
+@c mawk is what we're talking about.
+
+This initial version has a subtle problem.  What happens if the same data
+file is listed @emph{twice} on the command line, one right after the other,
+or even with just a variable assignment between the two occurrences of
+the file name?
+
+@c @findex nextfile
+@c do it this way, since all the indices are merged
+@cindex @code{nextfile} function
+In such a case,
+this code will skip right through the file, a second time, even though
+it should stop when it gets to the end of the first occurrence.
+Here is a second version of @code{nextfile} that remedies this problem.
+
+@example
+@group
+@c file eg/lib/nextfile.awk
+# nextfile --- skip remaining records in current file
+# correctly handle successive occurrences of the same file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+# this should be read in before the "main" awk program
+
+function nextfile()   @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME @{
+      if (FNR == 1)
+          _abandon_ = ""
+      else
+          next
+@}
+@c endfile
+@end group
+@end example
+
+The @code{nextfile} function has not changed.  It sets @code{_abandon_}
+equal to the current file name and then executes a @code{next} satement.
+The @code{next} statement reads the next record and increments @code{FNR},
+so @code{FNR} is guaranteed to have a value of at least two.
+However, if @code{nextfile} is called for the last record in the file,
+then @code{awk} will close the current data file and move on to the next
+one.  Upon doing so, @code{FILENAME} will be set to the name of the new file,
+and @code{FNR} will be reset to one.  If this next file is the same as
+the previous one, @code{_abandon_} will still be equal to @code{FILENAME}.
+However, @code{FNR} will be equal to one, telling us that this is a new
+occurrence of the file, and not the one we were reading when the
+@code{nextfile} function was executed.  In that case, @code{_abandon_}
+is reset to the empty string, so that further executions of this rule
+will fail (until the next time that @code{nextfile} is called).
+
+If @code{FNR} is not one, then we are still in the original data file,
+and the program executes a @code{next} statement to skip through it.
+
+An important question to ask at this point is: ``Given that the
+functionality of @code{nextfile} can be provided with a library file,
+why is it built into @code{gawk}?''  This is an important question.  Adding
+features for little reason leads to larger, slower programs that are
+harder to maintain.
+
+The answer is that building @code{nextfile} into @code{gawk} provides
+significant gains in efficiency.  If the @code{nextfile} function is executed
+at the beginning of a large data file, @code{awk} still has to scan the entire
+file, splitting it up into records, just to skip over it.  The built-in
+@code{nextfile} can simply close the file immediately and proceed to the
+next one, saving a lot of time.  This is particularly important in
+@code{awk}, since @code{awk} programs are generally I/O bound (i.e.@:
+they spend most of their time doing input and output, instead of performing
+computations).
+
+@node Assert Function, Round Function, Nextfile Function, Library Functions
+@section Assertions
+
+@cindex assertions
+@cindex @code{assert}, C version
+When writing large programs, it is often useful to be able to know
+that a condition or set of conditions is true.  Before proceeding with a
+particular computation, you make a statement about what you believe to be
+the case.  Such a statement is known as an
+``assertion.''  The C language provides an @code{<assert.h>} header file
+and corresponding @code{assert} macro that the programmer can use to make
+assertions.  If an assertion fails, the @code{assert} macro arranges to
+print a diagnostic message describing the condition that should have
+been true but was not, and then it kills the program.  In C, using
+@code{assert} looks this:
+
+@example
+#include <assert.h>
+
+int myfunc(int a, double b)
+@{
+     assert(a <= 5 && b >= 17);
+     @dots{}
+@}
+@end example
+
+If the assertion failed, the program would print a message similar to
+this:
+
+@example
+prog.c:5: assertion failed: a <= 5 && b >= 17
+@end example
+
+@findex assert
+The ANSI C language makes it possible to turn the condition into a string for use
+in printing the diagnostic message.  This is not possible in @code{awk}, so
+this @code{assert} function also requires a string version of the condition
+that is being tested.
+
+@example
+@c @group
+@c file eg/lib/assert.awk
+# assert --- assert that a condition is true. Otherwise exit.
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+function assert(condition, string)
+@{
+    if (! condition) @{
+        printf("%s:%d: assertion failed: %s\n",
+            FILENAME, FNR, string) > "/dev/stderr"
+        _assert_exit = 1
+        exit 1
+    @}
+@}
+
+END @{
+    if (_assert_exit)
+        exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{assert} function tests the @code{condition} parameter. If it
+is false, it prints a message to standard error, using the @code{string}
+parameter to describe the failed condition.  It then sets the variable
+@code{_assert_exit} to one, and executes the @code{exit} statement.
+The @code{exit} statement jumps to the @code{END} rule. If the @code{END}
+rules finds @code{_assert_exit} to be true, then it exits immediately.
+
+The purpose of the @code{END} rule with its test is to
+keep any other @code{END} rules from running.  When an assertion fails, the
+program should exit immediately.
+If no assertions fail, then @code{_assert_exit} will still be
+false when the @code{END} rule is run normally, and the rest of the
+program's @code{END} rules will execute.
+For all of this to work correctly, @file{assert.awk} must be the
+first source file read by @code{awk}.
+
+You would use this function in your programs this way:
+
+@example
+function myfunc(a, b)
+@{
+     assert(a <= 5 && b >= 17, "a <= 5 && b >= 17")
+     @dots{}
+@}
+@end example
+
+@noindent
+If the assertion failed, you would see a message like this:
+
+@example
+mydata:1357: assertion failed: a <= 5 && b >= 17
+@end example
+
+There is a problem with this version of @code{assert}, that it may not
+be possible to work around.  An @code{END} rule is automatically added
+to the program calling @code{assert}.  Normally, if a program consists
+of just a @code{BEGIN} rule, the input files and/or standard input are
+not read. However, now that the program has an @code{END} rule, @code{awk}
+will attempt to read the input data files, or standard input
+(@pxref{Using BEGIN/END, , Startup and Cleanup Actions}),
+most likely causing the program to hang, waiting for input.
+
+@node Round Function, Ordinal Functions, Assert Function, Library Functions
+@section Rounding Numbers
+
+@cindex rounding
+The way @code{printf} and @code{sprintf}
+(@pxref{Printf, , Using @code{printf} Statements for Fancier Printing})
+do rounding will often depend
+upon the system's C @code{sprintf} subroutine.
+On many machines,
+@code{sprintf} rounding is ``unbiased,'' which means it doesn't always
+round a trailing @samp{.5} up, contrary to naive expectations.  In unbiased 
+rounding, @samp{.5} rounds to even, rather than always up, so 1.5 rounds to
+2 but 4.5 rounds to 4.
+The result is that if you are using a format that does
+rounding (e.g., @code{"%.0f"}) you should check what your system does.
+The following function does traditional rounding;
+it might be useful if your awk's @code{printf} does unbiased rounding.
+
+@findex round
+@example
+@c file eg/lib/round.awk
+# round --- do normal rounding
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, August, 1996
+# Public Domain
+
+function round(x,   ival, aval, fraction)
+@{
+   ival = int(x)    # integer part, int() truncates
+
+   # see if fractional part
+   if (ival == x)   # no fraction
+      return x
+
+   if (x < 0) @{
+      aval = -x     # absolute value
+      ival = int(aval)
+      fraction = aval - ival
+      if (fraction >= .5)
+         return int(x) - 1   # -2.5 --> -3
+      else
+         return int(x)       # -2.3 --> -2
+   @} else @{
+      fraction = x - ival
+      if (fraction >= .5)
+         return ival + 1
+      else
+         return ival
+   @}
+@}
+
+# test harness
+@{ print $0, round($0) @}
+@c endfile
+@end example
+
+@node Ordinal Functions, Join Function, Round Function, Library Functions
+@section Translating Between Characters and Numbers
+
+@cindex numeric character values
+@cindex values of characters as numbers
+One commercial implementation of @code{awk} supplies a built-in function,
+@code{ord}, which takes a character and returns the numeric value for that
+character in the machine's character set.  If the string passed to
+@code{ord} has more than one character, only the first one is used.
+
+The inverse of this function is @code{chr} (from the function of the same
+name in Pascal), which takes a number and returns the corresponding character.
+
+Both functions can be written very nicely in @code{awk}; there is no real
+reason to build them into the @code{awk} interpreter.
+
+@findex ord
+@findex chr
+@example
+@group
+@c file eg/lib/ord.awk
+# ord.awk --- do ord and chr
+#
+# Global identifiers:
+#    _ord_:        numerical values indexed by characters
+#    _ord_init:    function to initialize _ord_
+#
+# Arnold Robbins
+# arnold@@gnu.ai.mit.edu
+# Public Domain
+# 16 January, 1992
+# 20 July, 1992, revised
+
+BEGIN    @{ _ord_init() @}
+@c endfile
+@end group
+
+@c @group
+@c file eg/lib/ord.awk
+function _ord_init(    low, high, i, t)
+@{
+    low = sprintf("%c", 7) # BEL is ascii 7
+    if (low == "\a") @{    # regular ascii
+        low = 0
+        high = 127
+    @} else if (sprintf("%c", 128 + 7) == "\a") @{
+        # ascii, mark parity
+        low = 128
+        high = 255
+    @} else @{        # ebcdic(!)
+        low = 0
+        high = 255
+    @}
+
+    for (i = low; i <= high; i++) @{
+        t = sprintf("%c", i)
+        _ord_[t] = i
+    @}
+@}
+@c endfile
+@c @end group
+@end example
+
+@cindex character sets
+@cindex character encodings
+@cindex ASCII
+@cindex EBCDIC
+@cindex mark parity
+Some explanation of the numbers used by @code{chr} is worthwhile.
+The most prominent character set in use today is ASCII. Although an
+eight-bit byte can hold 256 distinct values (from zero to 255), ASCII only
+defines characters that use the values from zero to 127.@footnote{ASCII
+has been extended in many countries to use the values from 128 to 255
+for country-specific characters.  If your  system uses these extensions,
+you can simplify @code{_ord_init} to simply loop from zero to 255.}
+At least one computer manufacturer that we know of
+@c Pr1me, blech
+uses ASCII, but with mark parity, meaning that the leftmost bit in the byte
+is always one.  What this means is that on those systems, characters
+have numeric values from 128 to 255.
+Finally, large mainframe systems use the EBCDIC character set, which
+uses all 256 values.
+While there are other character sets in use on some older systems,
+they are not really worth worrying about.
+
+@example
+@group
+@c file eg/lib/ord.awk
+function ord(str,    c)
+@{
+    # only first character is of interest
+    c = substr(str, 1, 1)
+    return _ord_[c]
+@}
+@c endfile
+@end group
+
+@group
+@c file eg/lib/ord.awk
+function chr(c)
+@{
+    # force c to be numeric by adding 0
+    return sprintf("%c", c + 0)
+@}
+@c endfile
+@end group
+
+@c @group
+@c file eg/lib/ord.awk
+#### test code ####
+# BEGIN    \
+# @{
+#    for (;;) @{
+#        printf("enter a character: ")
+#        if (getline var <= 0)
+#            break
+#        printf("ord(%s) = %d\n", var, ord(var))
+#    @}
+# @}
+@c endfile
+@c @end group
+@end example
+
+An obvious improvement to these functions would be to move the code for the
+@code{@w{_ord_init}} function into the body of the @code{BEGIN} rule.  It was
+written this way initially for ease of development.
+
+There is a ``test program'' in a @code{BEGIN} rule, for testing the
+function.  It is commented out for production use.
+
+@node Join Function, Mktime Function, Ordinal Functions, Library Functions
+@section Merging an Array Into a String
+
+@cindex merging strings
+When doing string processing, it is often useful to be able to join
+all the strings in an array into one long string.  The following function,
+@code{join}, accomplishes this task.  It is used later in several of
+the application programs
+(@pxref{Sample Programs, ,Practical @code{awk} Programs}).
+
+Good function design is important; this function needs to be general, but it
+should also have a reasonable default behavior.  It is called with an array
+and the beginning and ending indices of the elements in the array to be
+merged.  This assumes that the array indices are numeric---a reasonable
+assumption since the array was likely created with @code{split}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@findex join
+@example
+@group
+@c file eg/lib/join.awk
+# join.awk --- join an array into a string
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function join(array, start, end, sep,    result, i)
+@{
+    if (sep == "")
+       sep = " "
+    else if (sep == SUBSEP) # magic value
+       sep = ""
+    result = array[start]
+    for (i = start + 1; i <= end; i++)
+        result = result sep array[i]
+    return result
+@}
+@c endfile
+@end group
+@end example
+
+An optional additional argument is the separator to use when joining the
+strings back together.  If the caller supplies a non-empty value,
+@code{join} uses it.  If it is not supplied, it will have a null
+value.  In this case, @code{join} uses a single blank as a default
+separator for the strings.  If the value is equal to @code{SUBSEP},
+then @code{join} joins the strings with no separator between them.
+@code{SUBSEP} serves as a ``magic'' value to indicate that there should
+be no separation between the component strings.
+
+It would be nice if @code{awk} had an assignment operator for concatenation.
+The lack of an explicit operator for concatenation makes string operations
+more difficult than they really need to be.
+
+@node Mktime Function, Gettimeofday Function, Join Function, Library Functions
+@section Turning Dates Into Timestamps
+
+The @code{systime} function built in to @code{gawk}
+returns the current time of day as
+a timestamp in ``seconds since the Epoch.''  This timestamp
+can be converted into a printable date of almost infinitely variable
+format using the built-in @code{strftime} function.
+(For more information on @code{systime} and @code{strftime},
+@pxref{Time Functions, ,Functions for Dealing with Time Stamps}.)
+
+@cindex converting dates to timestamps
+@cindex dates, converting to timestamps
+@cindex timestamps, converting from dates
+An interesting but difficult problem is to convert a readable representation
+of a date back into a timestamp.  The ANSI C library provides a @code{mktime}
+function that does the basic job, converting a canonical representation of a
+date into a timestamp.
+
+It would appear at first glance that @code{gawk} would have to supply a
+@code{mktime} built-in function that was simply a ``hook'' to the C language
+version.  In fact though, @code{mktime} can be implemented entirely in
+@code{awk}.
+
+Here is a version of @code{mktime} for @code{awk}.  It takes a simple
+representation of the date and time, and converts it into a timestamp.
+
+The code is presented here intermixed with explanatory prose.  In
+@ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+you will see how the Texinfo source file for this @value{DOCUMENT}
+can be processed to extract the code into a single source file.
+
+The program begins with a descriptive comment and a @code{BEGIN} rule
+that initializes a table @code{_tm_months}.  This table is a two-dimensional
+array that has the lengths of the months.  The first index is zero for
+regular years, and one for leap years.  The values are the same for all the
+months in both kinds of years, except for February; thus the use of multiple
+assignment.
+
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# mktime.awk --- convert a canonical date representation
+#                into a timestamp
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN    \
+@{
+    # Initialize table of month lengths
+    _tm_months[0,1] = _tm_months[1,1] = 31
+    _tm_months[0,2] = 28; _tm_months[1,2] = 29
+    _tm_months[0,3] = _tm_months[1,3] = 31
+    _tm_months[0,4] = _tm_months[1,4] = 30
+    _tm_months[0,5] = _tm_months[1,5] = 31
+    _tm_months[0,6] = _tm_months[1,6] = 30
+    _tm_months[0,7] = _tm_months[1,7] = 31
+    _tm_months[0,8] = _tm_months[1,8] = 31
+    _tm_months[0,9] = _tm_months[1,9] = 30
+    _tm_months[0,10] = _tm_months[1,10] = 31
+    _tm_months[0,11] = _tm_months[1,11] = 30
+    _tm_months[0,12] = _tm_months[1,12] = 31
+@}
+@c endfile
+@c @end group
+@end example
+
+The benefit of merging multiple @code{BEGIN} rules
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+is particularly clear when writing library files.  Functions in library
+files can cleanly initialize their own private data and also provide clean-up
+actions in private @code{END} rules.
+
+The next function is a simple one that computes whether a given year is or
+is not a leap year.  If a year is evenly divisible by four, but not evenly
+divisible by 100, or if it is evenly divisible by 400, then it is a leap
+year.  Thus, 1904 was a leap year, 1900 was not, but 2000 will be.
+@c Change this after the year 2000 to ``2000 was'' (:-)
+
+@findex _tm_isleap
+@example
+@group
+@c file eg/lib/mktime.awk
+# decide if a year is a leap year
+function _tm_isleap(year,    ret)
+@{
+    ret = (year % 4 == 0 && year % 100 != 0) ||
+            (year % 400 == 0)
+
+    return ret
+@}
+@c endfile
+@end group
+@end example
+
+This function is only used a few times in this file, and its computation
+could have been written @dfn{in-line} (at the point where it's used).
+Making it a separate function made the original development easier, and also
+avoids the possibility of typing errors when duplicating the code in
+multiple places.
+
+The next function is more interesting.  It does most of the work of
+generating a timestamp, which is converting a date and time into some number
+of seconds since the Epoch.  The caller passes an array (rather
+imaginatively named @code{a}) containing six
+values: the year including century, the month as a number between one and 12,
+the day of the month, the hour as a number between zero and 23, the minute in
+the hour, and the seconds within the minute.
+
+The function uses several local variables to precompute the number of
+seconds in an hour, seconds in a day, and seconds in a year.  Often,
+similar C code simply writes out the expression in-line, expecting the
+compiler to do @dfn{constant folding}.  E.g., most C compilers would
+turn @samp{60 * 60} into @samp{3600} at compile time, instead of recomputing
+it every time at run time.  Precomputing these values makes the
+function more efficient.
+
+@findex _tm_addup
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# convert a date into seconds
+function _tm_addup(a,    total, yearsecs, daysecs,
+                         hoursecs, i, j)
+@{
+    hoursecs = 60 * 60
+    daysecs = 24 * hoursecs
+    yearsecs = 365 * daysecs
+
+    total = (a[1] - 1970) * yearsecs
+
+@group
+    # extra day for leap years
+    for (i = 1970; i < a[1]; i++)
+        if (_tm_isleap(i))
+            total += daysecs
+@end group
+
+@group
+    j = _tm_isleap(a[1])
+    for (i = 1; i < a[2]; i++)
+        total += _tm_months[j, i] * daysecs
+@end group
+
+    total += (a[3] - 1) * daysecs
+    total += a[4] * hoursecs
+    total += a[5] * 60
+    total += a[6]
+
+    return total
+@}
+@c endfile
+@c @end group
+@end example
+
+The function starts with a first approximation of all the seconds between
+Midnight, January 1, 1970,@footnote{This is the Epoch on POSIX systems.
+It may be different on other systems.} and the beginning of the current
+year.  It then goes through all those years, and for every leap year,
+adds an additional day's worth of seconds.
+
+The variable @code{j} holds either one or zero, if the current year is or is not
+a leap year.
+For every month in the current year prior to the current month, it adds
+the number of seconds in the month, using the appropriate entry in the
+@code{_tm_months} array.
+
+Finally, it adds in the seconds for the number of days prior to the current
+day, and the number of hours, minutes, and seconds in the current day.
+
+The result is a count of seconds since January 1, 1970.  This value is not
+yet what is needed though.  The reason why is described shortly.
+
+The main @code{mktime} function takes a single character string argument.
+This string is a representation of a date and time in a ``canonical''
+(fixed) form.  This string should be
+@code{"@var{year} @var{month} @var{day} @var{hour} @var{minute} @var{second}"}.
+
+@findex mktime
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# mktime --- convert a date into seconds,
+#            compensate for time zone
+
+function mktime(str,    res1, res2, a, b, i, j, t, diff)
+@{
+    i = split(str, a, " ")    # don't rely on FS
+
+    if (i != 6)
+        return -1
+
+    # force numeric
+    for (j in a)
+        a[j] += 0
+
+@group
+    # validate
+    if (a[1] < 1970 ||
+        a[2] < 1 || a[2] > 12 ||
+        a[3] < 1 || a[3] > 31 ||
+        a[4] < 0 || a[4] > 23 ||
+        a[5] < 0 || a[5] > 59 ||
+        a[6] < 0 || a[6] > 60 )
+            return -1
+@end group
+
+    res1 = _tm_addup(a)
+    t = strftime("%Y %m %d %H %M %S", res1)
+
+    if (_tm_debug)
+        printf("(%s) -> (%s)\n", str, t) > "/dev/stderr"
+
+    split(t, b, " ")
+    res2 = _tm_addup(b)
+
+    diff = res1 - res2
+
+    if (_tm_debug)
+        printf("diff = %d seconds\n", diff) > "/dev/stderr"
+
+    res1 += diff
+
+    return res1
+@}
+@c endfile
+@c @end group
+@end example
+
+The function first splits the string into an array, using spaces and tabs as
+separators.  If there are not six elements in the array, it returns an
+error, signaled as the value @minus{}1.
+Next, it forces each element of the array to be numeric, by adding zero to it.
+The following @samp{if} statement then makes sure that each element is
+within an allowable range.  (This checking could be extended further, e.g.,
+to make sure that the day of the month is within the correct range for the
+particular month supplied.)  All of this is essentially preliminary set-up
+and error checking.
+
+Recall that @code{_tm_addup} generated a value in seconds since Midnight,
+January 1, 1970.  This value is not directly usable as the result we want,
+@emph{since the calculation does not account for the local timezone}.  In other
+words, the value represents the count in seconds since the Epoch, but only
+for UTC (Universal Coordinated Time).  If the local timezone is east or west
+of UTC, then some number of hours should be either added to, or subtracted from
+the resulting timestamp.
+
+For example, 6:23 p.m. in Atlanta, Georgia (USA), is normally five hours west
+of (behind) UTC.  It is only four hours behind UTC if daylight savings
+time is in effect.
+If you are calling @code{mktime} in Atlanta, with the argument
+@code{@w{"1993 5 23 18 23 12"}}, the result from @code{_tm_addup} will be
+for 6:23 p.m. UTC, which is only 2:23 p.m. in Atlanta.  It is necessary to
+add another four hours worth of seconds to the result.
+
+How can @code{mktime} determine how far away it is from UTC?  This is
+surprisingly easy.  The returned timestamp represents the time passed to
+@code{mktime} @emph{as UTC}.  This timestamp can be fed back to
+@code{strftime}, which will format it as a @emph{local} time; i.e.@: as
+if it already had the UTC difference added in to it.  This is done by
+giving @code{@w{"%Y %m %d %H %M %S"}} to @code{strftime} as the format
+argument.  It returns the computed timestamp in the original string
+format.  The result represents a time that accounts for the UTC
+difference.  When the new time is converted back to a timestamp, the
+difference between the two timestamps is the difference (in seconds)
+between the local timezone and UTC.  This difference is then added back
+to the original result.  An example demonstrating this is presented below.
+
+Finally, there is a ``main'' program for testing the function.
+
+@example
+@c @group
+@c file eg/lib/mktime.awk
+BEGIN  @{
+    if (_tm_test) @{
+        printf "Enter date as yyyy mm dd hh mm ss: "
+        getline _tm_test_date
+    
+        t = mktime(_tm_test_date)
+        r = strftime("%Y %m %d %H %M %S", t)
+        printf "Got back (%s)\n", r
+    @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The entire program uses two variables that can be set on the command
+line to control debugging output and to enable the test in the final
+@code{BEGIN} rule.  Here is the result of a test run. (Note that debugging
+output is to standard error, and test output is to standard output.)
+
+@example
+@c @group
+$ gawk -f mktime.awk -v _tm_test=1 -v _tm_debug=1
+@print{} Enter date as yyyy mm dd hh mm ss: 1993 5 23 15 35 10
+@error{} (1993 5 23 15 35 10) -> (1993 05 23 11 35 10)
+@error{} diff = 14400 seconds
+@print{} Got back (1993 05 23 15 35 10)
+@c @end group
+@end example
+
+The time entered was 3:35 p.m. (15:35 on a 24-hour clock), on May 23, 1993.
+The first line
+of debugging output shows the resulting time as UTC---four hours ahead of
+the local time zone.  The second line shows that the difference is 14400
+seconds, which is four hours.  (The difference is only four hours, since
+daylight savings time is in effect during May.)
+The final line of test output shows that the timezone compensation
+algorithm works; the returned time is the same as the entered time.
+
+This program does not solve the general problem of turning an arbitrary date
+representation into a timestamp.  That problem is very involved.  However,
+the @code{mktime} function provides a foundation upon which to build. Other
+software can convert month names into numeric months, and AM/PM times into
+24-hour clocks, to generate the ``canonical'' format that @code{mktime}
+requires.
+
+@node Gettimeofday Function, Filetrans Function, Mktime Function, Library Functions
+@section Managing the Time of Day
+
+@cindex formatted timestamps
+@cindex timestamps, formatted
+The @code{systime} and @code{strftime} functions described in
+@ref{Time Functions, ,Functions for Dealing with Time Stamps},
+provide the minimum functionality necessary for dealing with the time of day
+in human readable form.  While @code{strftime} is extensive, the control
+formats are not necessarily easy to remember or intuitively obvious when
+reading a program.
+
+The following function, @code{gettimeofday}, populates a user-supplied array
+with pre-formatted time information.  It returns a string with the current
+time formatted in the same way as the @code{date} utility.
+
+@findex gettimeofday
+@example
+@c @group
+@c file eg/lib/gettime.awk
+# gettimeofday --- get the time of day in a usable format
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain, May 1993
+#
+# Returns a string in the format of output of date(1)
+# Populates the array argument time with individual values:
+#    time["second"]       -- seconds (0 - 59)
+#    time["minute"]       -- minutes (0 - 59)
+#    time["hour"]         -- hours (0 - 23)
+#    time["althour"]      -- hours (0 - 12)
+#    time["monthday"]     -- day of month (1 - 31)
+#    time["month"]        -- month of year (1 - 12)
+#    time["monthname"]    -- name of the month
+#    time["shortmonth"]   -- short name of the month
+#    time["year"]         -- year within century (0 - 99)
+#    time["fullyear"]     -- year with century (19xx or 20xx)
+#    time["weekday"]      -- day of week (Sunday = 0)
+#    time["altweekday"]   -- day of week (Monday = 0)
+#    time["weeknum"]      -- week number, Sunday first day
+#    time["altweeknum"]   -- week number, Monday first day
+#    time["dayname"]      -- name of weekday
+#    time["shortdayname"] -- short name of weekday
+#    time["yearday"]      -- day of year (0 - 365)
+#    time["timezone"]     -- abbreviation of timezone name
+#    time["ampm"]         -- AM or PM designation
+
+@group
+function gettimeofday(time,    ret, now, i)
+@{
+    # get time once, avoids unnecessary system calls
+    now = systime()
+
+    # return date(1)-style output
+    ret = strftime("%a %b %d %H:%M:%S %Z %Y", now)
+
+    # clear out target array
+    for (i in time)
+        delete time[i]
+@end group
+
+@group
+    # fill in values, force numeric values to be
+    # numeric by adding 0
+    time["second"]       = strftime("%S", now) + 0
+    time["minute"]       = strftime("%M", now) + 0
+    time["hour"]         = strftime("%H", now) + 0
+    time["althour"]      = strftime("%I", now) + 0
+    time["monthday"]     = strftime("%d", now) + 0
+    time["month"]        = strftime("%m", now) + 0
+    time["monthname"]    = strftime("%B", now)
+    time["shortmonth"]   = strftime("%b", now)
+    time["year"]         = strftime("%y", now) + 0
+    time["fullyear"]     = strftime("%Y", now) + 0
+    time["weekday"]      = strftime("%w", now) + 0
+    time["altweekday"]   = strftime("%u", now) + 0
+    time["dayname"]      = strftime("%A", now)
+    time["shortdayname"] = strftime("%a", now)
+    time["yearday"]      = strftime("%j", now) + 0
+    time["timezone"]     = strftime("%Z", now)
+    time["ampm"]         = strftime("%p", now)
+    time["weeknum"]      = strftime("%U", now) + 0
+    time["altweeknum"]   = strftime("%W", now) + 0
+
+    return ret
+@}
+@end group
+@c endfile
+@end example
+
+The string indices are easier to use and read than the various formats
+required by @code{strftime}.  The @code{alarm} program presented in
+@ref{Alarm Program, ,An Alarm Clock Program},
+uses this function.
+
+@c exercise!!!
+The @code{gettimeofday} function is presented above as it was written. A
+more general design for this function would have allowed the user to supply
+an optional timestamp value that would have been used instead of the current
+time.
+
+@node Filetrans Function, Getopt Function, Gettimeofday Function, Library Functions
+@section Noting Data File Boundaries
+
+@cindex per file initialization and clean-up
+The @code{BEGIN} and @code{END} rules are each executed exactly once, at
+the beginning and end respectively of your @code{awk} program
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+We (the @code{gawk} authors) once had a user who mistakenly thought that the
+@code{BEGIN} rule was executed at the beginning of each data file and the
+@code{END} rule was executed at the end of each data file.  When informed
+that this was not the case, the user requested that we add new special
+patterns to @code{gawk}, named @code{BEGIN_FILE} and @code{END_FILE}, that
+would have the desired behavior.  He even supplied us the code to do so.
+
+However, after a little thought, I came up with the following library program.
+It arranges to call two user-supplied functions, @code{beginfile} and
+@code{endfile}, at the beginning and end of each data file.
+Besides solving the problem in only nine(!) lines of code, it does so
+@emph{portably}; this will work with any implementation of @code{awk}.
+
+@example
+@c @group
+# transfile.awk
+#
+# Give the user a hook for filename transitions
+#
+# The user must supply functions beginfile() and endfile()
+# that each take the name of the file being started or
+# finished, respectively.
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, January 1992
+# Public Domain
+
+FILENAME != _oldfilename \
+@{
+    if (_oldfilename != "")
+        endfile(_oldfilename)
+    _oldfilename = FILENAME
+    beginfile(FILENAME)
+@}
+
+END   @{ endfile(FILENAME) @}
+@c @end group
+@end example
+
+This file must be loaded before the user's ``main'' program, so that the
+rule it supplies will be executed first.
+
+This rule relies on @code{awk}'s @code{FILENAME} variable that
+automatically changes for each new data file.  The current file name is
+saved in a private variable, @code{_oldfilename}.  If @code{FILENAME} does
+not equal @code{_oldfilename}, then a new data file is being processed, and
+it is necessary to call @code{endfile} for the old file.  Since
+@code{endfile} should only be called if a file has been processed, the
+program first checks to make sure that @code{_oldfilename} is not the null
+string.  The program then assigns the current file name to
+@code{_oldfilename}, and calls @code{beginfile} for the file.
+Since, like all @code{awk} variables, @code{_oldfilename} will be
+initialized to the null string, this rule executes correctly even for the
+first data file.
+
+The program also supplies an @code{END} rule, to do the final processing for
+the last file.  Since this @code{END} rule comes before any @code{END} rules
+supplied in the ``main'' program, @code{endfile} will be called first.  Once
+again the value of multiple @code{BEGIN} and @code{END} rules should be clear.
+
+@findex beginfile
+@findex endfile
+This version has same problem as the first version of @code{nextfile}
+(@pxref{Nextfile Function, ,Implementing @code{nextfile} as a Function}).
+If the same data file occurs twice in a row on command line, then
+@code{endfile} and @code{beginfile} will not be executed at the end of the
+first pass and at the beginning of the second pass.
+This version solves the problem.
+
+@example
+@c @group
+@c file eg/lib/ftrans.awk
+# ftrans.awk --- handle data file transitions
+#
+# user supplies beginfile() and endfile() functions
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu. November 1992
+# Public Domain
+
+FNR == 1 @{
+    if (_filename_ != "")
+        endfile(_filename_)
+    _filename_ = FILENAME
+    beginfile(FILENAME)
+@}
+
+END  @{ endfile(_filename_) @}
+@c endfile
+@c @end group
+@end example
+
+In @ref{Wc Program, ,Counting Things},
+you will see how this library function can be used, and
+how it simplifies writing the main program.
+
+@node Getopt Function, Passwd Functions, Filetrans Function, Library Functions
+@section Processing Command Line Options
+
+@cindex @code{getopt}, C version
+@cindex processing arguments
+@cindex argument processing
+Most utilities on POSIX compatible systems take options or ``switches'' on
+the command line that can be used to change the way a program behaves.
+@code{awk} is an example of such a program
+(@pxref{Options, ,Command Line Options}).
+Often, options take @dfn{arguments}, data that the program needs to
+correctly obey the command line option.  For example, @code{awk}'s
+@samp{-F} option requires a string to use as the field separator.
+The first occurrence on the command line of either @samp{--} or a
+string that does not begin with @samp{-} ends the options.
+
+Most Unix systems provide a C function named @code{getopt} for processing
+command line arguments.  The programmer provides a string describing the one
+letter options. If an option requires an argument, it is followed in the
+string with a colon.  @code{getopt} is also passed the
+count and values of the command line arguments, and is called in a loop.
+@code{getopt} processes the command line arguments for option letters.
+Each time around the loop, it returns a single character representing the
+next option letter that it found, or @samp{?} if it found an invalid option.
+When it returns @minus{}1, there are no options left on the command line.
+
+When using @code{getopt}, options that do not take arguments can be
+grouped together.  Furthermore, options that take arguments require that the
+argument be present.  The argument can immediately follow the option letter,
+or it can be a separate command line argument.
+
+Given a hypothetical program that takes
+three command line options, @samp{-a}, @samp{-b}, and @samp{-c}, and
+@samp{-b} requires an argument, all of the following are valid ways of
+invoking the program:
+
+@example
+@c @group
+prog -a -b foo -c data1 data2 data3
+prog -ac -bfoo -- data1 data2 data3
+prog -acbfoo data1 data2 data3
+@c @end group
+@end example
+
+Notice that when the argument is grouped with its option, the rest of
+the command line argument is considered to be the option's argument.
+In the above example, @samp{-acbfoo} indicates that all of the
+@samp{-a}, @samp{-b}, and @samp{-c} options were supplied,
+and that @samp{foo} is the argument to the @samp{-b} option.
+
+@code{getopt} provides four external variables that the programmer can use.
+
+@table @code
+@item optind
+The index in the argument value array (@code{argv}) where the first
+non-option command line argument can be found.
+
+@item optarg
+The string value of the argument to an option.
+
+@item opterr
+Usually @code{getopt} prints an error message when it finds an invalid
+option.  Setting @code{opterr} to zero disables this feature.  (An
+application might wish to print its own error message.)
+
+@item optopt
+The letter representing the command line option.
+While not usually documented, most versions supply this variable.
+@end table
+
+The following C fragment shows how @code{getopt} might process command line
+arguments for @code{awk}.
+
+@example
+@group
+int
+main(int argc, char *argv[])
+@{
+    @dots{}
+    /* print our own message */
+    opterr = 0;
+@end group
+@group
+    while ((c = getopt(argc, argv, "v:f:F:W:")) != -1) @{
+        switch (c) @{
+        case 'f':    /* file */
+            @dots{}
+            break;
+        case 'F':    /* field separator */
+            @dots{}
+            break;
+        case 'v':    /* variable assignment */
+            @dots{}
+            break;
+        case 'W':    /* extension */
+            @dots{}
+            break;
+        case '?':
+        default:
+            usage();
+            break;
+        @}
+    @}
+    @dots{}
+@}
+@end group
+@end example
+
+As a side point, @code{gawk} actually uses the GNU @code{getopt_long}
+function to process both normal and GNU-style long options
+(@pxref{Options, ,Command Line Options}).
+
+The abstraction provided by @code{getopt} is very useful, and would be quite
+handy in @code{awk} programs as well.  Here is an @code{awk} version of
+@code{getopt}.  This function highlights one of the greatest weaknesses in
+@code{awk}, which is that it is very poor at manipulating single characters.
+Repeated calls to @code{substr} are necessary for accessing individual
+characters (@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+The discussion walks through the code a bit at a time.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+# getopt --- do C library getopt(3) function in awk
+#
+# arnold@@gnu.ai.mit.edu
+# Public domain
+#
+# Initial version: March, 1991
+# Revised: May, 1993
+
+@group
+# External variables:
+#    Optind -- index of ARGV for first non-option argument
+#    Optarg -- string value of argument to current option
+#    Opterr -- if non-zero, print our own diagnostic
+#    Optopt -- current option letter
+@end group
+
+# Returns
+#    -1     at end of options
+#    ?      for unrecognized option
+#    <c>    a character representing the current option
+
+# Private Data
+#    _opti  index in multi-flag option, e.g., -abc
+@c endfile
+@c @end group
+@end example
+
+The function starts out with some documentation: who wrote the code,
+and when it was revised, followed by a list of the global variables it uses,
+what the return values are and what they mean, and any global variables that
+are ``private'' to this library function.  Such documentation is essential
+for any program, and particularly for library functions.
+
+@findex getopt
+@example
+@c @group
+@c file eg/lib/getopt.awk
+function getopt(argc, argv, options,    optl, thisopt, i)
+@{
+    optl = length(options)
+    if (optl == 0)        # no options given
+        return -1
+
+    if (argv[Optind] == "--") @{  # all done
+        Optind++
+        _opti = 0
+        return -1
+    @} else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) @{
+        _opti = 0
+        return -1
+    @}
+@c endfile
+@c @end group
+@end example
+
+The function first checks that it was indeed called with a string of options
+(the @code{options} parameter).  If @code{options} has a zero length,
+@code{getopt} immediately returns @minus{}1.
+
+The next thing to check for is the end of the options.  A @samp{--} ends the
+command line options, as does any command line argument that does not begin
+with a @samp{-}.  @code{Optind} is used to step through the array of command
+line arguments; it retains its value across calls to @code{getopt}, since it
+is a global variable.
+
+The regexp used, @code{@w{/^-[^: \t\n\f\r\v\b]/}}, is
+perhaps a bit of overkill; it checks for a @samp{-} followed by anything
+that is not whitespace and not a colon.
+If the current command line argument does not match this pattern,
+it is not an option, and it ends option processing.
+
+@example
+@group
+@c file eg/lib/getopt.awk
+    if (_opti == 0)
+        _opti = 2
+    thisopt = substr(argv[Optind], _opti, 1)
+    Optopt = thisopt
+    i = index(options, thisopt)
+    if (i == 0) @{
+        if (Opterr)
+            printf("%c -- invalid option\n",
+                                  thisopt) > "/dev/stderr"
+        if (_opti >= length(argv[Optind])) @{
+            Optind++
+            _opti = 0
+        @} else
+            _opti++
+        return "?"
+    @}
+@c endfile
+@end group
+@end example
+
+The @code{_opti} variable tracks the position in the current command line
+argument (@code{argv[Optind]}).  In the case that multiple options were
+grouped together with one @samp{-} (e.g., @samp{-abx}), it is necessary
+to return them to the user one at a time.
+
+If @code{_opti} is equal to zero, it is set to two, the index in the string
+of the next character to look at (we skip the @samp{-}, which is at position
+one).  The variable @code{thisopt} holds the character, obtained with
+@code{substr}.  It is saved in @code{Optopt} for the main program to use.
+
+If @code{thisopt} is not in the @code{options} string, then it is an
+invalid option.  If @code{Opterr} is non-zero, @code{getopt} prints an error
+message on the standard error that is similar to the message from the C
+version of @code{getopt}.
+
+Since the option is invalid, it is necessary to skip it and move on to the
+next option character.  If @code{_opti} is greater than or equal to the
+length of the current command line argument, then it is necessary to move on
+to the next one, so @code{Optind} is incremented and @code{_opti} is reset
+to zero. Otherwise, @code{Optind} is left alone and @code{_opti} is merely
+incremented.
+
+In any case, since the option was invalid, @code{getopt} returns @samp{?}.
+The main program can examine @code{Optopt} if it needs to know what the
+invalid option letter actually was.
+
+@example
+@group
+@c file eg/lib/getopt.awk
+    if (substr(options, i + 1, 1) == ":") @{
+        # get option argument
+        if (length(substr(argv[Optind], _opti + 1)) > 0)
+            Optarg = substr(argv[Optind], _opti + 1)
+        else
+            Optarg = argv[++Optind]
+        _opti = 0
+    @} else
+        Optarg = ""
+@c endfile
+@end group
+@end example
+
+If the option requires an argument, the option letter is followed by a colon
+in the @code{options} string.  If there are remaining characters in the
+current command line argument (@code{argv[Optind]}), then the rest of that
+string is assigned to @code{Optarg}.  Otherwise, the next command line
+argument is used (@samp{-xFOO} vs. @samp{@w{-x FOO}}). In either case,
+@code{_opti} is reset to zero, since there are no more characters left to
+examine in the current command line argument.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+    if (_opti == 0 || _opti >= length(argv[Optind])) @{
+        Optind++
+        _opti = 0
+    @} else
+        _opti++
+    return thisopt
+@}
+@c endfile
+@c @end group
+@end example
+
+Finally, if @code{_opti} is either zero or greater than the length of the
+current command line argument, it means this element in @code{argv} is
+through being processed, so @code{Optind} is incremented to point to the
+next element in @code{argv}.  If neither condition is true, then only
+@code{_opti} is incremented, so that the next option letter can be processed
+on the next call to @code{getopt}.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+BEGIN @{
+    Opterr = 1    # default is to diagnose
+    Optind = 1    # skip ARGV[0]
+
+    # test program
+    if (_getopt_test) @{
+        while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
+            printf("c = <%c>, optarg = <%s>\n",
+                                       _go_c, Optarg)
+        printf("non-option arguments:\n")
+        for (; Optind < ARGC; Optind++)
+            printf("\tARGV[%d] = <%s>\n",
+                                    Optind, ARGV[Optind])
+    @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
+@code{Opterr} is set to one, since the default behavior is for @code{getopt}
+to print a diagnostic message upon seeing an invalid option.  @code{Optind}
+is set to one, since there's no reason to look at the program name, which is
+in @code{ARGV[0]}.
+
+The rest of the @code{BEGIN} rule is a simple test program.  Here is the
+result of two sample runs of the test program.
+
+@example
+@group
+$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
+@print{} c = <a>, optarg = <>
+@print{} c = <c>, optarg = <>
+@print{} c = <b>, optarg = <ARG>
+@print{} non-option arguments:
+@print{}         ARGV[3] = <bax>
+@print{}         ARGV[4] = <-x>
+@end group
+
+@group
+$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc
+@print{} c = <a>, optarg = <>
+@error{} x -- invalid option
+@print{} c = <?>, optarg = <>
+@print{} non-option arguments:
+@print{}         ARGV[4] = <xyz>
+@print{}         ARGV[5] = <abc>
+@end group
+@end example
+
+The first @samp{--} terminates the arguments to @code{awk}, so that it does
+not try to interpret the @samp{-a} etc. as its own options.
+
+Several of the sample programs presented in
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+use @code{getopt} to process their arguments.
+
+@node Passwd Functions, Group Functions, Getopt Function, Library Functions
+@section Reading the User Database
+
+@cindex @file{/dev/user}
+The @file{/dev/user} special file
+(@pxref{Special Files, ,Special File Names in @code{gawk}})
+provides access to the current user's real and effective user and group id
+numbers, and if available, the user's supplementary group set.
+However, since these are numbers, they do not provide very useful
+information to the average user.  There needs to be some way to find the
+user information associated with the user and group numbers.  This
+section presents a suite of functions for retrieving information from the
+user database.  @xref{Group Functions, ,Reading the Group Database},
+for a similar suite that retrieves information from the group database.
+
+@cindex @code{getpwent}, C version
+@cindex user information
+@cindex login information
+@cindex account information
+@cindex password file
+The POSIX standard does not define the file where user information is
+kept.  Instead, it provides the @code{<pwd.h>} header file
+and several C language subroutines for obtaining user information.
+The primary function is @code{getpwent}, for ``get password entry.''
+The ``password'' comes from the original user database file,
+@file{/etc/passwd}, which kept user information, along with the
+encrypted passwords (hence the name).
+
+While an @code{awk} program could simply read @file{/etc/passwd} directly
+(the format is well known), because of the way password
+files are handled on networked systems,
+this file may not contain complete information about the system's set of users.
+
+@cindex @code{pwcat} program
+To be sure of being
+able to produce a readable, complete version of the user database, it is
+necessary to write a small C program that calls @code{getpwent}.
+@code{getpwent} is defined to return a pointer to a @code{struct passwd}.
+Each time it is called, it returns the next entry in the database.
+When there are no more entries, it returns @code{NULL}, the null pointer.
+When this happens, the C program should call @code{endpwent} to close the
+database.
+Here is @code{pwcat}, a C program that ``cats'' the password database.
+
+@findex pwcat.c
+@example
+@c @group
+@c file eg/lib/pwcat.c
+/*
+ * pwcat.c
+ *
+ * Generate a printable version of the password database
+ *
+ * Arnold Robbins
+ * arnold@@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <pwd.h>
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+@{
+    struct passwd *p;
+
+    while ((p = getpwent()) != NULL)
+        printf("%s:%s:%d:%d:%s:%s:%s\n",
+            p->pw_name, p->pw_passwd, p->pw_uid,
+            p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell);
+
+    endpwent();
+    exit(0);
+@}
+@c endfile
+@c @end group
+@end example
+
+If you don't understand C, don't worry about it.
+The output from @code{pwcat} is the user database, in the traditional
+@file{/etc/passwd} format of colon-separated fields.  The fields are:
+
+@table @asis
+@item Login name
+The user's login name.
+
+@item Encrypted password
+The user's encrypted password.  This may not be available on some systems.
+
+@item User-ID
+The user's numeric user-id number.
+
+@item Group-ID
+The user's numeric group-id number.
+
+@item Full name
+The user's full name, and perhaps other information associated with the
+user.
+
+@item Home directory
+The user's login, or ``home'' directory (familiar to shell programmers as
+@code{$HOME}).
+
+@item Login shell
+The program that will be run when the user logs in.  This is usually a
+shell, such as Bash (the Gnu Bourne-Again shell).
+@end table
+
+Here are a few lines representative of @code{pwcat}'s output.
+
+@example
+@c @group
+$ pwcat
+@print{} root:3Ov02d5VaUPB6:0:1:Operator:/:/bin/sh
+@print{} nobody:*:65534:65534::/:
+@print{} daemon:*:1:1::/:
+@print{} sys:*:2:2::/:/bin/csh
+@print{} bin:*:3:3::/bin:
+@print{} arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@print{} miriam:yxaay:112:10:Miriam Robbins:/home/miriam:/bin/sh
+@print{} andy:abcca2:113:10:Andy Jacobs:/home/andy:/bin/sh
+@dots{}
+@c @end group
+@end example
+
+With that introduction, here is a group of functions for getting user
+information.  There are several functions here, corresponding to the C
+functions of the same name.
+
+@findex _pw_init
+@example
+@c file eg/lib/passwdawk.in
+@group
+# passwd.awk --- access password file information
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN @{
+    # tailor this to suit your system
+    _pw_awklib = "/usr/local/libexec/awk/"
+@}
+@end group
+
+@group
+function _pw_init(    oldfs, oldrs, olddol0, pwcat)
+@{
+    if (_pw_inited)
+        return
+    oldfs = FS
+    oldrs = RS
+    olddol0 = $0
+    FS = ":"
+    RS = "\n"
+    pwcat = _pw_awklib "pwcat"
+    while ((pwcat | getline) > 0) @{
+        _pw_byname[$1] = $0
+        _pw_byuid[$3] = $0
+        _pw_bycount[++_pw_total] = $0
+    @}
+    close(pwcat)
+    _pw_count = 0
+    _pw_inited = 1
+    FS = oldfs
+    RS = oldrs
+    $0 = olddol0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@code{pwcat} is stored.  Since it is used to help out an @code{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}.
+You might want it to be in a different directory on your system.
+
+The function @code{_pw_init} keeps three copies of the user information
+in three associative arrays.  The arrays are indexed by user name
+(@code{_pw_byname}), by user-id number (@code{_pw_byuid}), and by order of
+occurrence (@code{_pw_bycount}).
+
+The variable @code{_pw_inited} is used for efficiency; @code{_pw_init} only
+needs to be called once.
+
+Since this function uses @code{getline} to read information from
+@code{pwcat}, it first saves the values of @code{FS}, @code{RS}, and
+@code{$0}.  Doing so is necessary, since these functions could be called
+from anywhere within a user's program, and the user may have his or her
+own values for @code{FS} and @code{RS}.
+@ignore
+Problem, what if FIELDWIDTHS is in use? Sigh.
+@end ignore
+
+The main part of the function uses a loop to read database lines, split
+the line into fields, and then store the line into each array as necessary.
+When the loop is done, @code{@w{_pw_init}} cleans up by closing the pipeline,
+setting @code{@w{_pw_inited}} to one, and restoring @code{FS}, @code{RS}, and
+@code{$0}.  The use of @code{@w{_pw_count}} will be explained below.
+
+@findex getpwnam
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwnam(name)
+@{
+    _pw_init()
+    if (name in _pw_byname)
+        return _pw_byname[name]
+    return ""
+@}
+@c endfile
+@end group
+@end example
+
+The @code{getpwnam} function takes a user name as a string argument. If that
+user is in the database, it returns the appropriate line. Otherwise it
+returns the null string.
+
+@findex getpwuid
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwuid(uid)
+@{
+    _pw_init()
+    if (uid in _pw_byuid)
+        return _pw_byuid[uid]
+    return ""
+@}
+@c endfile
+@end group
+@end example
+
+Similarly,
+the @code{getpwuid} function takes a user-id number argument. If that
+user number is in the database, it returns the appropriate line. Otherwise it
+returns the null string.
+
+@findex getpwent
+@example
+@c @group
+@c file eg/lib/passwdawk.in
+function getpwent()
+@{
+    _pw_init()
+    if (_pw_count < _pw_total)
+        return _pw_bycount[++_pw_count]
+    return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getpwent} function simply steps through the database, one entry at
+a time.  It uses @code{_pw_count} to track its current position in the
+@code{_pw_bycount} array.
+
+@findex endpwent
+@example
+@c @group
+@c file eg/lib/passwdawk.in
+function endpwent()
+@{
+    _pw_count = 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{@w{endpwent}} function resets @code{@w{_pw_count}} to zero, so that
+subsequent calls to @code{getpwent} will start over again.
+
+A conscious design decision in this suite is that each subroutine calls
+@code{@w{_pw_init}} to initialize the database arrays.  The overhead of running
+a separate process to generate the user database, and the I/O to scan it,
+will only be incurred if the user's main program actually calls one of these
+functions.  If this library file is loaded along with a user's program, but
+none of the routines are ever called, then there is no extra run-time overhead.
+(The alternative would be to move the body of @code{@w{_pw_init}} into a
+@code{BEGIN} rule, which would always run @code{pwcat}.  This simplifies the
+code but runs an extra process that may never be needed.)
+
+In turn, calling @code{_pw_init} is not too expensive, since the
+@code{_pw_inited} variable keeps the program from reading the data more than
+once.  If you are worried about squeezing every last cycle out of your
+@code{awk} program, the check of @code{_pw_inited} could be moved out of
+@code{_pw_init} and duplicated in all the other functions.  In practice,
+this is not necessary, since most @code{awk} programs are I/O bound, and it
+would clutter up the code.
+
+The @code{id} program in @ref{Id Program, ,Printing Out User Information},
+uses these functions.
+
+@node Group Functions, Library Names, Passwd Functions, Library Functions
+@section Reading the Group Database
+
+@cindex @code{getgrent}, C version
+@cindex group information
+@cindex account information
+@cindex group file
+Much of the discussion presented in
+@ref{Passwd Functions, ,Reading the User Database},
+applies to the group database as well.  Although there has traditionally
+been a well known file, @file{/etc/group}, in a well known format, the POSIX
+standard only provides a set of C library routines
+(@code{<grp.h>} and @code{getgrent})
+for accessing the information.
+Even though this file may exist, it likely does not have
+complete information.  Therefore, as with the user database, it is necessary
+to have a small C program that generates the group database as its output.
+
+@cindex @code{grcat} program
+Here is @code{grcat}, a C program that ``cats'' the group database.
+
+@findex grcat.c
+@example
+@c @group
+@c file eg/lib/grcat.c
+/*
+ * grcat.c
+ *
+ * Generate a printable version of the group database
+ *
+ * Arnold Robbins, arnold@@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <grp.h>
+
+@group
+int
+main(argc, argv)
+int argc;
+char **argv;
+@{
+    struct group *g;
+    int i;
+@end group
+
+    while ((g = getgrent()) != NULL) @{
+        printf("%s:%s:%d:", g->gr_name, g->gr_passwd,
+                                            g->gr_gid);
+        for (i = 0; g->gr_mem[i] != NULL; i++) @{
+            printf("%s", g->gr_mem[i]);
+            if (g->gr_mem[i+1] != NULL)
+                putchar(',');
+        @}
+        putchar('\n');
+    @}
+    endgrent();
+    exit(0);
+@}
+@c endfile
+@c @end group
+@end example
+
+Each line in the group database represent one group.  The fields are
+separated with colons, and represent the following information.
+
+@table @asis
+@item Group Name
+The name of the group.
+
+@item Group Password
+The encrypted group password. In practice, this field is never used. It is
+usually empty, or set to @samp{*}.
+
+@item Group ID Number
+The numeric group-id number. This number should be unique within the file.
+
+@item Group Member List
+A comma-separated list of user names.  These users are members of the group.
+Most Unix systems allow users to be members of several groups
+simultaneously.  If your system does, then reading @file{/dev/user} will
+return those group-id numbers in @code{$5} through @code{$NF}.
+(Note that @file{/dev/user} is a @code{gawk} extension;
+@pxref{Special Files, ,Special File Names in @code{gawk}}.)
+@end table
+
+Here is what running @code{grcat} might produce:
+
+@example
+@group
+$ grcat
+@print{} wheel:*:0:arnold
+@print{} nogroup:*:65534:
+@print{} daemon:*:1:
+@print{} kmem:*:2:
+@print{} staff:*:10:arnold,miriam,andy
+@print{} other:*:20:
+@dots{}
+@end group
+@end example
+
+Here are the functions for obtaining information from the group database.
+There are several, modeled after the C library functions of the same names.
+
+@findex _gr_init
+@example
+@group
+@c file eg/lib/groupawk.in
+# group.awk --- functions for dealing with the group file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN    \
+@{
+    # Change to suit your system
+    _gr_awklib = "/usr/local/libexec/awk/"
+@}
+@c endfile
+@end group
+
+@group
+@c file eg/lib/groupawk.in
+function _gr_init(    oldfs, oldrs, olddol0, grcat, n, a, i)
+@{
+    if (_gr_inited)
+        return
+@end group
+
+@group
+    oldfs = FS
+    oldrs = RS
+    olddol0 = $0
+    FS = ":"
+    RS = "\n"
+@end group
+
+@group
+    grcat = _gr_awklib "grcat"
+    while ((grcat | getline) > 0) @{
+        if ($1 in _gr_byname)
+            _gr_byname[$1] = _gr_byname[$1] "," $4
+        else
+            _gr_byname[$1] = $0
+        if ($3 in _gr_bygid)
+            _gr_bygid[$3] = _gr_bygid[$3] "," $4
+        else
+            _gr_bygid[$3] = $0
+
+        n = split($4, a, "[ \t]*,[ \t]*")
+@end group
+@group
+        for (i = 1; i <= n; i++)
+            if (a[i] in _gr_groupsbyuser)
+                _gr_groupsbyuser[a[i]] = \
+                    _gr_groupsbyuser[a[i]] " " $1
+            else
+                _gr_groupsbyuser[a[i]] = $1
+@end group
+
+@group
+        _gr_bycount[++_gr_count] = $0
+    @}
+@end group
+@group
+    close(grcat)
+    _gr_count = 0
+    _gr_inited++
+    FS = oldfs
+    RS = oldrs
+    $0 = olddol0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@code{grcat} is stored.  Since it is used to help out an @code{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}.  You might
+want it to be in a different directory on your system.
+
+These routines follow the same general outline as the user database routines
+(@pxref{Passwd Functions, ,Reading the User Database}).
+The @code{@w{_gr_inited}} variable is used to
+ensure that the database is scanned no more than once.
+The @code{@w{_gr_init}} function first saves @code{FS}, @code{RS}, and
+@code{$0}, and then sets @code{FS} and @code{RS} to the correct values for
+scanning the group information.
+
+The group information is stored is several associative arrays.
+The arrays are indexed by group name (@code{@w{_gr_byname}}), by group-id number
+(@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}).
+There is an additional array indexed by user name (@code{@w{_gr_groupsbyuser}}),
+that is a space separated list of groups that each user belongs to.
+
+Unlike the user database, it is possible to have multiple records in the
+database for the same group.  This is common when a group has a large number
+of members.  Such a pair of entries might look like:
+
+@example
+tvpeople:*:101:johny,jay,arsenio
+tvpeople:*:101:david,conan,tom,joan
+@end example
+
+For this reason, @code{_gr_init} looks to see if a group name or
+group-id number has already been seen.  If it has, then the user names are
+simply concatenated onto the previous list of users.  (There is actually a
+subtle problem with the code presented above.  Suppose that
+the first time there were no names. This code adds the names with
+a leading comma. It also doesn't check that there is a @code{$4}.)
+
+Finally, @code{_gr_init} closes the pipeline to @code{grcat}, restores
+@code{FS}, @code{RS}, and @code{$0}, initializes @code{_gr_count} to zero
+(it is used later), and makes @code{_gr_inited} non-zero.
+
+@findex getgrnam
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrnam(group)
+@{
+    _gr_init()
+    if (group in _gr_byname)
+        return _gr_byname[group]
+    return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrnam} function takes a group name as its argument, and if that
+group exists, it is returned. Otherwise, @code{getgrnam} returns the null
+string.
+
+@findex getgrgid
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrgid(gid)
+@{
+    _gr_init()
+    if (gid in _gr_bygid)
+        return _gr_bygid[gid]
+    return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrgid} function is similar, it takes a numeric group-id, and
+looks up the information associated with that group-id.
+
+@findex getgruser
+@example
+@group
+@c file eg/lib/groupawk.in
+function getgruser(user)
+@{
+    _gr_init()
+    if (user in _gr_groupsbyuser)
+        return _gr_groupsbyuser[user]
+    return ""
+@}
+@c endfile
+@end group
+@end example
+
+The @code{getgruser} function does not have a C counterpart. It takes a
+user name, and returns the list of groups that have the user as a member.
+
+@findex getgrent
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrent()
+@{
+    _gr_init()
+    if (++gr_count in _gr_bycount)
+        return _gr_bycount[_gr_count]
+    return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrent} function steps through the database one entry at a time.
+It uses @code{_gr_count} to track its position in the list.
+
+@findex endgrent
+@example
+@group
+@c file eg/lib/groupawk.in
+function endgrent()
+@{
+    _gr_count = 0
+@}
+@c endfile
+@end group
+@end example
+
+@code{endgrent} resets @code{_gr_count} to zero so that @code{getgrent} can
+start over again.
+
+As with the user database routines, each function calls @code{_gr_init} to
+initialize the arrays.  Doing so only incurs the extra overhead of running
+@code{grcat} if these functions are used (as opposed to moving the body of
+@code{_gr_init} into a @code{BEGIN} rule).
+
+Most of the work is in scanning the database and building the various
+associative arrays.  The functions that the user calls are themselves very
+simple, relying on @code{awk}'s associative arrays to do work.
+
+The @code{id} program in @ref{Id Program, ,Printing Out User Information},
+uses these functions.
+
+@node Library Names,  , Group Functions, Library Functions
+@section Naming Library Function Global Variables
+
+@cindex namespace issues in @code{awk}
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+Due to the way the @code{awk} language evolved, variables are either
+@dfn{global} (usable by the entire program), or @dfn{local} (usable just by
+a specific function).  There is no intermediate state analogous to
+@code{static} variables in C.
+
+Library functions often need to have global variables that they can use to
+preserve state information between calls to the function. For example,
+@code{getopt}'s variable @code{_opti}
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{_tm_months} array used by @code{mktime}
+(@pxref{Mktime Function, ,Turning Dates Into Timestamps}).
+Such variables are called @dfn{private}, since the only functions that need to
+use them are the ones in the library.
+
+When writing a library function, you should try to choose names for your
+private variables so that they will not conflict with any variables used by
+either another library function or a user's main program.  For example, a
+name like @samp{i} or @samp{j} is not a good choice, since user programs
+often use variable names like these for their own purposes.
+
+The example programs shown in this chapter all start the names of their
+private variables with an underscore (@samp{_}).  Users generally don't use
+leading underscores in their variable names, so this convention immediately
+decreases the chances that the variable name will be accidentally shared
+with the user's program.
+
+In addition, several of the library functions use a prefix that helps
+indicate what function or set of functions uses the variables. For example,
+@code{_tm_months} in @code{mktime}
+(@pxref{Mktime Function, ,Turning Dates Into Timestamps}), and
+@code{_pw_byname} in the user data base routines
+(@pxref{Passwd Functions, ,Reading the User Database}).
+This convention is recommended, since it even further decreases the chance
+of inadvertent conflict among variable names.
+Note that this convention can be used equally well both for variable names
+and for private function names too.
+
+While I could have re-written all the library routines to use this
+convention, I did not do so, in order to show how my own @code{awk}
+programming style has evolved, and to provide some basis for this
+discussion.
+
+As a final note on variable naming, if a function makes global variables
+available for use by a main program, it is a good convention to start that
+variable's name with a capital letter.
+For example, @code{getopt}'s @code{Opterr} and @code{Optind} variables
+(@pxref{Getopt Function, ,Processing Command Line Options}).
+The leading capital letter indicates that it is global, while the fact that
+the variable name is not all capital letters indicates that the variable is
+not one of @code{awk}'s built-in variables, like @code{FS}.
+
+It is also important that @emph{all} variables in library functions
+that do not need to save state are in fact declared local.  If this is
+not done, the variable could accidentally be used in the user's program,
+leading to bugs that are very difficult to track down.
+
+@example
+function lib_func(x, y,    l1, l2)
+@{
+    @dots{}
+    @var{use variable} some_var  # some_var could be local
+    @dots{}                   # but is not by oversight
+@}
+@end example
+
+@cindex Tcl
+A different convention, common in the Tcl community, is to use a single
+associative array to hold the values needed by the library function(s), or
+``package.''  This significantly decreases the number of actual global names
+in use.  For example, the functions described in
+@ref{Passwd Functions, , Reading the User Database},
+might have used @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
+@code{@w{PW_data["count"]}} and @code{@w{PW_data["awklib"]}}, instead of
+@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
+and @code{@w{_pw_count}}.
+
+The conventions presented in this section are exactly that, conventions. You
+are not required to write your programs this way, we merely recommend that
+you do so.
+
+@node Sample Programs, Language History, Library Functions, Top
+@chapter Practical @code{awk} Programs
+
+This chapter presents a potpourri of @code{awk} programs for your reading
+enjoyment.
+@iftex
+There are two sections.  The first presents @code{awk}
+versions of several common POSIX utilities.
+The second is a grab-bag of interesting programs.
+@end iftex
+
+Many of these programs use the library functions presented in
+@ref{Library Functions, ,A Library of @code{awk} Functions}.
+
+@menu
+* Clones::                    Clones of common utilities.
+* Miscellaneous Programs::    Some interesting @code{awk} programs.
+@end menu
+
+@node Clones, Miscellaneous Programs, Sample Programs, Sample Programs
+@section Re-inventing Wheels for Fun and Profit
+
+This section presents a number of POSIX utilities that are implemented in
+@code{awk}.  Re-inventing these programs in @code{awk} is often enjoyable,
+since the algorithms can be very clearly expressed, and usually the code is
+very concise and simple.  This is true because @code{awk} does so much for you.
+
+It should be noted that these programs are not necessarily intended to
+replace the installed versions on your system.  Instead, their
+purpose is to illustrate @code{awk} language programming for ``real world''
+tasks.
+
+The programs are presented in alphabetical order.
+
+@menu
+* Cut Program::             The @code{cut} utility.
+* Egrep Program::           The @code{egrep} utility.
+* Id Program::              The @code{id} utility.
+* Split Program::           The @code{split} utility.
+* Tee Program::             The @code{tee} utility.
+* Uniq Program::            The @code{uniq} utility.
+* Wc Program::              The @code{wc} utility.
+@end menu
+
+@node Cut Program, Egrep Program, Clones, Clones
+@subsection Cutting Out Fields and Columns
+
+@cindex @code{cut} utility
+The @code{cut} utility selects, or ``cuts,'' either characters or fields
+from its standard
+input and sends them to its standard output.  @code{cut} can cut out either
+a list of characters, or a list of fields.  By default, fields are separated
+by tabs, but you may supply a command line option to change the field
+@dfn{delimiter}, i.e.@: the field separator character. @code{cut}'s definition
+of fields is less general than @code{awk}'s.
+
+A common use of @code{cut} might be to pull out just the login name of
+logged-on users from the output of @code{who}.  For example, the following
+pipeline generates a sorted, unique list of the logged on users:
+
+@example
+who | cut -c1-8 | sort | uniq
+@end example
+
+The options for @code{cut} are:
+
+@table @code
+@item -c @var{list}
+Use @var{list} as the list of characters to cut out.  Items within the list
+may be separated by commas, and ranges of characters can be separated with
+dashes.  The list @samp{1-8,15,22-35} specifies characters one through
+eight, 15, and 22 through 35.
+
+@item -f @var{list}
+Use @var{list} as the list of fields to cut out.
+
+@item -d @var{delim}
+Use @var{delim} as the field separator character instead of the tab
+character.
+
+@item -s
+Suppress printing of lines that do not contain the field delimiter.
+@end table
+
+The @code{awk} implementation of @code{cut} uses the @code{getopt} library
+function (@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The program begins with a comment describing the options and a @code{usage}
+function which prints out a usage message and exits.  @code{usage} is called
+if invalid arguments are supplied.
+
+@findex cut.awk
+@example
+@c @group
+@c file eg/prog/cut.awk
+# cut.awk --- implement cut in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+#    -f list        Cut fields
+#    -d c           Field delimiter character
+#    -c list        Cut characters
+#
+#    -s        Suppress lines without the delimiter character
+
+function usage(    e1, e2)
+@{
+    e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
+    e2 = "usage: cut [-c list] [files...]"
+    print e1 > "/dev/stderr"
+    print e2 > "/dev/stderr"
+    exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+@noindent
+The variables @code{e1} and @code{e2} are used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+Next comes a @code{BEGIN} rule that parses the command line options.
+It sets @code{FS} to a single tab character, since that is @code{cut}'s
+default field separator.  The output field separator is also set to be the
+same as the input field separator.  Then @code{getopt} is used to step
+through the command line options.  One or the other of the variables
+@code{by_fields} or @code{by_chars} is set to true, to indicate that
+processing should be done by fields or by characters respectively.
+When cutting by characters, the output field separator is set to the null
+string.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+BEGIN    \
+@{
+    FS = "\t"    # default
+    OFS = FS
+    while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
+        if (c == "f") @{
+            by_fields = 1
+            fieldlist = Optarg
+@group
+        @} else if (c == "c") @{
+            by_chars = 1
+            fieldlist = Optarg
+            OFS = ""
+        @} else if (c == "d") @{
+            if (length(Optarg) > 1) @{
+                printf("Using first character of %s" \
+                " for delimiter\n", Optarg) > "/dev/stderr"
+                Optarg = substr(Optarg, 1, 1)
+            @}
+            FS = Optarg
+            OFS = FS
+            if (FS == " ")    # defeat awk semantics
+                FS = "[ ]"
+        @} else if (c == "s")
+            suppress++
+        else
+            usage()
+    @}
+@end group
+
+    for (i = 1; i < Optind; i++)
+        ARGV[i] = ""
+@c endfile
+@c @end group
+@end example
+
+Special care is taken when the field delimiter is a space. Using
+@code{@w{" "}} (a single space) for the value of @code{FS} is
+incorrect---@code{awk} would
+separate fields with runs of spaces, tabs and/or newlines, and we want them to be
+separated with individual spaces.  Also, note that after @code{getopt} is
+through, we have to clear out all the elements of @code{ARGV} from one to
+@code{Optind}, so that @code{awk} will not try to process the command line
+options as file names.
+
+After dealing with the command line options, the program verifies that the
+options make sense.  Only one or the other of @samp{-c} and @samp{-f} should
+be used, and both require a field list.  Then either @code{set_fieldlist} or
+@code{set_charlist} is called to pull apart the list of fields or
+characters.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+    if (by_fields && by_chars)
+        usage()
+
+    if (by_fields == 0 && by_chars == 0)
+        by_fields = 1    # default
+
+    if (fieldlist == "") @{
+        print "cut: needs list for -c or -f" > "/dev/stderr"
+        exit 1
+    @}
+
+@group
+    if (by_fields)
+        set_fieldlist()
+    else
+        set_charlist()
+@}
+@c endfile
+@end group
+@end example
+
+Here is @code{set_fieldlist}.  It first splits the field list apart
+at the commas, into an array.  Then, for each element of the array, it
+looks to see if it is actually a range, and if so splits it apart. The range
+is verified to make sure the first number is smaller than the second.
+Each number in the list is added to the @code{flist} array, which simply
+lists the fields that will be printed.
+Normal field splitting is used.
+The program lets @code{awk}
+handle the job of doing the field splitting.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+function set_fieldlist(        n, m, i, j, k, f, g)
+@{
+    n = split(fieldlist, f, ",")
+    j = 1    # index in flist
+    for (i = 1; i <= n; i++) @{
+        if (index(f[i], "-") != 0) @{ # a range
+            m = split(f[i], g, "-")
+            if (m != 2 || g[1] >= g[2]) @{
+                printf("bad field list: %s\n",
+                                  f[i]) > "/dev/stderr"
+                exit 1
+            @}
+            for (k = g[1]; k <= g[2]; k++)
+                flist[j++] = k
+        @} else
+            flist[j++] = f[i]
+    @}
+    nfields = j - 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{set_charlist} function is more complicated than @code{set_fieldlist}.
+The idea here is to use @code{gawk}'s @code{FIELDWIDTHS} variable
+(@pxref{Constant Size, ,Reading Fixed-width Data}),
+which describes constant width input.  When using a character list, that is
+exactly what we have.
+
+Setting up @code{FIELDWIDTHS} is more complicated than simply listing the
+fields that need to be printed.  We have to keep track of the fields to be
+printed, and also the intervening characters that have to be skipped.
+For example, suppose you wanted characters one through eight, 15, and
+22 through 35.  You would use @samp{-c 1-8,15,22-35}.  The necessary value
+for @code{FIELDWIDTHS} would be @code{@w{"8 6 1 6 14"}}.  This gives us five
+fields, and what should be printed are @code{$1}, @code{$3}, and @code{$5}.
+The intermediate fields are ``filler,'' stuff in between the desired data.
+
+@code{flist} lists the fields to be printed, and @code{t} tracks the
+complete field list, including filler fields.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+function set_charlist(    field, i, j, f, g, t,
+                          filler, last, len)
+@{
+    field = 1   # count total fields
+    n = split(fieldlist, f, ",")
+    j = 1       # index in flist
+    for (i = 1; i <= n; i++) @{
+        if (index(f[i], "-") != 0) @{ # range
+            m = split(f[i], g, "-")
+            if (m != 2 || g[1] >= g[2]) @{
+                printf("bad character list: %s\n",
+                               f[i]) > "/dev/stderr"
+                exit 1
+            @}
+            len = g[2] - g[1] + 1
+            if (g[1] > 1)  # compute length of filler
+                filler = g[1] - last - 1
+            else
+                filler = 0
+            if (filler)
+                t[field++] = filler
+            t[field++] = len  # length of field
+            last = g[2]
+            flist[j++] = field - 1
+        @} else @{
+            if (f[i] > 1)
+                filler = f[i] - last - 1
+            else
+                filler = 0
+            if (filler)
+                t[field++] = filler
+            t[field++] = 1
+            last = f[i]
+            flist[j++] = field - 1
+        @}
+    @}
+@group
+    FIELDWIDTHS = join(t, 1, field - 1)
+    nfields = j - 1
+@}
+@end group
+@c endfile
+@end example
+
+Here is the rule that actually processes the data.  If the @samp{-s} option
+was given, then @code{suppress} will be true.  The first @code{if} statement
+makes sure that the input record does have the field separator.  If
+@code{cut} is processing fields, @code{suppress} is true, and the field
+separator character is not in the record, then the record is skipped.
+
+If the record is valid, then at this point, @code{gawk} has split the data
+into fields, either using the character in @code{FS} or using fixed-length
+fields and @code{FIELDWIDTHS}.  The loop goes through the list of fields
+that should be printed.  If the corresponding field has data in it, it is
+printed.  If the next field also has data, then the separator character is
+written out in between the fields.
+
+@c 2e: Could use `index($0, FS) != 0' instead of `$0 !~ FS', below
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+@{
+    if (by_fields && suppress && $0 !~ FS)
+        next
+
+    for (i = 1; i <= nfields; i++) @{
+        if ($flist[i] != "") @{
+            printf "%s", $flist[i]
+            if (i < nfields && $flist[i+1] != "")
+                printf "%s", OFS
+        @}
+    @}
+    print ""
+@}
+@c endfile
+@c @end group
+@end example
+
+This version of @code{cut} relies on @code{gawk}'s @code{FIELDWIDTHS}
+variable to do the character-based cutting.  While it would be possible in
+other @code{awk} implementations to use @code{substr}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+it would also be extremely painful to do so.
+The @code{FIELDWIDTHS} variable supplies an elegant solution to the problem
+of picking the input line apart by characters.
+
+@node Egrep Program, Id Program, Cut Program, Clones
+@subsection Searching for Regular Expressions in Files
+
+@cindex @code{egrep} utility
+The @code{egrep} utility searches files for patterns.  It uses regular
+expressions that are almost identical to those available in @code{awk}
+(@pxref{Regexp Constants, ,Regular Expression Constants}).  It is used this way:
+
+@example
+egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
+@end example
+
+The @var{pattern} is a regexp.
+In typical usage, the regexp is quoted to prevent the shell from expanding
+any of the special characters as file name wildcards.
+Normally, @code{egrep} prints the 
+lines that matched.  If multiple file names are provided on the command
+line, each output line is preceded by the name of the file and a colon.
+
+@c NEEDED
+@page
+The options are:
+
+@table @code
+@item -c
+Print out a count of the lines that matched the pattern, instead of the
+lines themselves.
+
+@item -s
+Be silent.  No output is produced, and the exit value indicates whether
+or not the pattern was matched.
+
+@item -v
+Invert the sense of the test. @code{egrep} prints the lines that do
+@emph{not} match the pattern, and exits successfully if the pattern was not
+matched.
+
+@item -i
+Ignore case distinctions in both the pattern and the input data.
+
+@item -l
+Only print the names of the files that matched, not the lines that matched.
+
+@item -e @var{pattern}
+Use @var{pattern} as the regexp to match.  The purpose of the @samp{-e}
+option is to allow patterns that start with a @samp{-}.
+@end table
+
+This version uses the @code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the file transition library program
+(@pxref{Filetrans Function, ,Noting Data File Boundaries}).
+
+The program begins with a descriptive comment, and then a @code{BEGIN} rule
+that processes the command line arguments with @code{getopt}.  The @samp{-i}
+(ignore case) option is particularly easy with @code{gawk}; we just use the
+@code{IGNORECASE} built in variable
+(@pxref{Built-in Variables}).
+
+@findex egrep.awk
+@example
+@c @group
+@c file eg/prog/egrep.awk
+# egrep.awk --- simulate egrep in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+#    -c    count of lines
+#    -s    silent - use exit value
+#    -v    invert test, success if no match
+#    -i    ignore case
+#    -l    print filenames only
+#    -e    argument is pattern
+
+BEGIN @{
+    while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) @{
+        if (c == "c")
+            count_only++
+        else if (c == "s")
+            no_print++
+        else if (c == "v")
+            invert++
+        else if (c == "i")
+            IGNORECASE = 1
+        else if (c == "l")
+            filenames_only++
+        else if (c == "e")
+            pattern = Optarg
+        else
+            usage()
+    @}
+@c endfile
+@c @end group
+@end example
+
+Next comes the code that handles the @code{egrep} specific behavior. If no
+pattern was supplied with @samp{-e}, the first non-option on the command
+line is used.  The @code{awk} command line arguments up to @code{ARGV[Optind]}
+are cleared, so that @code{awk} won't try to process them as files.  If no
+files were specified, the standard input is used, and if multiple files were
+specified, we make sure to note this so that the file names can precede the
+matched lines in the output.
+
+The last two lines are commented out, since they are not needed in
+@code{gawk}.  They should be uncommented if you have to use another version
+of @code{awk}.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+    if (pattern == "")
+        pattern = ARGV[Optind++]
+
+    for (i = 1; i < Optind; i++)
+        ARGV[i] = ""
+    if (Optind >= ARGC) @{
+        ARGV[1] = "-"
+        ARGC = 2
+    @} else if (ARGC - Optind > 1)
+        do_filenames++
+
+#    if (IGNORECASE)
+#        pattern = tolower(pattern)
+@}
+@c endfile
+@c @end group
+@end example
+
+The next set of lines should be uncommented if you are not using
+@code{gawk}.  This rule translates all the characters in the input line
+into lower-case if the @samp{-i} option was specified.  The rule is
+commented out since it is not necessary with @code{gawk}.
+@c bug: if a match happens, we output the translated line, not the original
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+#@{
+#    if (IGNORECASE)
+#        $0 = tolower($0)
+#@}
+@c endfile
+@c @end group
+@end example
+
+The @code{beginfile} function is called by the rule in @file{ftrans.awk}
+when each new file is processed.  In this case, it is very simple; all it
+does is initialize a variable @code{fcount} to zero. @code{fcount} tracks
+how many lines in the current file matched the pattern.
+
+@example
+@group
+@c file eg/prog/egrep.awk
+function beginfile(junk)
+@{
+    fcount = 0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{endfile} function is called after each file has been processed.
+It is used only when the user wants a count of the number of lines that
+matched.  @code{no_print} will be true only if the exit status is desired.
+@code{count_only} will be true if line counts are desired.  @code{egrep}
+will therefore only print line counts if printing and counting are enabled.
+The output format must be adjusted depending upon the number of files to be
+processed.  Finally, @code{fcount} is added to @code{total}, so that we
+know how many lines altogether matched the pattern.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function endfile(file)
+@{
+    if (! no_print && count_only)
+        if (do_filenames)
+            print file ":" fcount
+        else
+            print fcount
+
+    total += fcount
+@}
+@c endfile
+@c @end group
+@end example
+
+This rule does most of the work of matching lines. The variable
+@code{matches} will be true if the line matched the pattern. If the user
+wants lines that did not match, the sense of the @code{matches} is inverted
+using the @samp{!} operator. @code{fcount} is incremented with the value of
+@code{matches}, which will be either one or zero, depending upon a
+successful or unsuccessful match.  If the line did not match, the
+@code{next} statement just moves on to the next record.
+
+There are several optimizations for performance in the following few lines
+of code. If the user only wants exit status (@code{no_print} is true), and
+we don't have to count lines, then it is enough to know that one line in
+this file matched, and we can skip on to the next file with @code{nextfile}.
+Along similar lines, if we are only printing file names, and we
+don't need to count lines, we can print the file name, and then skip to the
+next file with @code{nextfile}.
+
+Finally, each line is printed, with a leading filename and colon if
+necessary.
+
+@ignore
+2e: note, probably better to recode the last few lines as
+    if (! count_only) @{
+        if (no_print)
+            nextfile
+
+        if (filenames_only) @{
+            print FILENAME
+            nextfile
+        @}
+
+        if (do_filenames)
+            print FILENAME ":" $0
+        else
+            print
+    @}
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+@{
+    matches = ($0 ~ pattern)
+    if (invert)
+        matches = ! matches
+
+    fcount += matches    # 1 or 0
+
+@group
+    if (! matches)
+        next
+@end group
+
+    if (no_print && ! count_only)
+        nextfile
+
+    if (filenames_only && ! count_only) @{
+        print FILENAME
+        nextfile
+    @}
+
+    if (do_filenames && ! count_only)
+        print FILENAME ":" $0
+    else if (! count_only)
+        print
+@}
+@c endfile
+@c @end group
+@end example
+
+@c @strong{Exercise}: rearrange the code inside @samp{if (! count_only)}.
+
+The @code{END} rule takes care of producing the correct exit status. If
+there were no matches, the exit status is one, otherwise it is zero.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+END    \
+@{
+    if (total == 0)
+        exit 1
+    exit 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{usage} function prints a usage message in case of invalid options
+and then exits.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function usage(    e)
+@{
+    e = "Usage: egrep [-csvil] [-e pat] [files ...]"
+    print e > "/dev/stderr"
+    exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The variable @code{e} is used so that the function fits nicely
+on the printed page.
+
+@cindex backslash continuation
+Just a note on programming style. You may have noticed that the @code{END}
+rule uses backslash continuation, with the open brace on a line by
+itself.  This is so that it more closely resembles the way functions
+are written.  Many of the examples
+@iftex
+in this chapter
+@end iftex
+use this style. You can decide for yourself if you like writing
+your @code{BEGIN} and @code{END} rules this way,
+or not.
+
+@node Id Program, Split Program, Egrep Program, Clones
+@subsection Printing Out User Information
+
+@cindex @code{id} utility
+The @code{id} utility lists a user's real and effective user-id numbers,
+real and effective group-id numbers, and the user's group set, if any.
+@code{id} will only print the effective user-id and group-id if they are
+different from the real ones.  If possible, @code{id} will also supply the
+corresponding user and group names.  The output might look like this:
+
+@example
+$ id
+@print{} uid=2076(arnold) gid=10(staff) groups=10(staff),4(tty)
+@end example
+
+This information is exactly what is provided by @code{gawk}'s
+@file{/dev/user} special file (@pxref{Special Files, ,Special File Names in @code{gawk}}).
+However, the @code{id} utility provides a more palatable output than just a
+string of numbers.
+
+Here is a simple version of @code{id} written in @code{awk}.
+It uses the user database library functions
+(@pxref{Passwd Functions, ,Reading the User Database}),
+and the group database library functions
+(@pxref{Group Functions, ,Reading the Group Database}).
+
+The program is fairly straightforward.  All the work is done in the
+@code{BEGIN} rule.  The user and group id numbers are obtained from
+@file{/dev/user}.  If there is no support for @file{/dev/user}, the program
+gives up.
+
+The code is repetitive.  The entry in the user database for the real user-id
+number is split into parts at the @samp{:}. The name is the first field.
+Similar code is used for the effective user-id number, and the group
+numbers.
+
+@findex id.awk
+@example
+@c @group
+@c file eg/prog/id.awk
+# id.awk --- implement id in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# output is:
+# uid=12(foo) euid=34(bar) gid=3(baz) \
+#             egid=5(blat) groups=9(nine),2(two),1(one)
+
+BEGIN    \
+@{
+    if ((getline < "/dev/user") < 0) @{
+        err = "id: no /dev/user support - cannot run"
+        print err > "/dev/stderr"
+        exit 1
+    @}
+    close("/dev/user")
+
+    uid = $1
+    euid = $2
+    gid = $3
+    egid = $4
+
+    printf("uid=%d", uid)
+    pw = getpwuid(uid)
+@group
+    if (pw != "") @{
+        split(pw, a, ":")
+        printf("(%s)", a[1])
+    @}
+@end group
+
+    if (euid != uid) @{
+        printf(" euid=%d", euid)
+        pw = getpwuid(euid)
+        if (pw != "") @{
+            split(pw, a, ":")
+            printf("(%s)", a[1])
+        @}
+    @}
+
+    printf(" gid=%d", gid)
+    pw = getgrgid(gid)
+    if (pw != "") @{
+        split(pw, a, ":")
+        printf("(%s)", a[1])
+    @}
+
+    if (egid != gid) @{
+        printf(" egid=%d", egid)
+        pw = getgrgid(egid)
+        if (pw != "") @{
+            split(pw, a, ":")
+            printf("(%s)", a[1])
+        @}
+    @}
+
+    if (NF > 4) @{
+        printf(" groups=");
+        for (i = 5; i <= NF; i++) @{
+            printf("%d", $i)
+            pw = getgrgid($i)
+            if (pw != "") @{
+                split(pw, a, ":")
+                printf("(%s)", a[1])
+            @}
+@group
+            if (i < NF)
+                printf(",")
+@end group
+        @}
+    @}
+    print ""
+@}
+@c endfile
+@c @end group
+@end example
+
+@c exercise!!!
+@ignore
+The POSIX version of @code{id} takes arguments that control which
+information is printed.  Modify this version to accept the same
+arguments and perform in the same way.
+@end ignore
+
+@node Split Program, Tee Program, Id Program, Clones
+@subsection Splitting a Large File Into Pieces
+
+@cindex @code{split} utility
+The @code{split} program splits large text files into smaller pieces. By default,
+the output files are named @file{xaa}, @file{xab}, and so on. Each file has
+1000 lines in it, with the likely exception of the last file. To change the
+number of lines in each file, you supply a number on the command line
+preceded with a minus, e.g., @samp{-500} for files with 500 lines in them
+instead of 1000.  To change the name of the output files to something like
+@file{myfileaa}, @file{myfileab}, and so on, you supply an additional
+argument that specifies the filename.
+
+Here is a version of @code{split} in @code{awk}. It uses the @code{ord} and
+@code{chr} functions presented in
+@ref{Ordinal Functions, ,Translating Between Characters and Numbers}.
+
+The program first sets its defaults, and then tests to make sure there are
+not too many arguments.  It then looks at each argument in turn.  The
+first argument could be a minus followed by a number. If it is, this happens
+to look like a negative number, so it is made positive, and that is the
+count of lines.  The data file name is skipped over, and the final argument
+is used as the prefix for the output file names.
+
+@findex split.awk
+@example
+@c @group
+@c file eg/prog/split.awk
+# split.awk --- do split in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: split [-num] [file] [outname]
+
+BEGIN @{
+    outfile = "x"    # default
+    count = 1000
+    if (ARGC > 4)
+        usage()
+
+    i = 1
+    if (ARGV[i] ~ /^-[0-9]+$/) @{
+        count = -ARGV[i]
+        ARGV[i] = ""
+        i++
+    @}
+    # test argv in case reading from stdin instead of file
+    if (i in ARGV)
+        i++    # skip data file name
+    if (i in ARGV) @{
+        outfile = ARGV[i]
+        ARGV[i] = ""
+    @}
+
+    s1 = s2 = "a"
+    out = (outfile s1 s2)
+@}
+@c endfile
+@c @end group
+@end example
+
+The next rule does most of the work. @code{tcount} (temporary count) tracks
+how many lines have been printed to the output file so far. If it is greater
+than @code{count}, it is time to close the current file and start a new one.
+@code{s1} and @code{s2} track the current suffixes for the file name. If
+they are both @samp{z}, the file is just too big.  Otherwise, @code{s1}
+moves to the next letter in the alphabet and @code{s2} starts over again at
+@samp{a}.
+
+@example
+@c @group
+@c file eg/prog/split.awk
+@{
+    if (++tcount > count) @{
+        close(out)
+        if (s2 == "z") @{
+            if (s1 == "z") @{
+                printf("split: %s is too large to split\n", \
+                       FILENAME) > "/dev/stderr"
+                exit 1
+            @}
+            s1 = chr(ord(s1) + 1)
+            s2 = "a"
+        @} else
+            s2 = chr(ord(s2) + 1)
+        out = (outfile s1 s2)
+        tcount = 1
+    @}
+    print > out
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{usage} function simply prints an error message and exits.
+
+@example
+@c @group
+@c file eg/prog/split.awk
+function usage(   e)
+@{
+    e = "usage: split [-num] [file] [outname]"
+    print e > "/dev/stderr"
+    exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+@noindent
+The variable @code{e} is used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+This program is a bit sloppy; it relies on @code{awk} to close the last file
+for it automatically, instead of doing it in an @code{END} rule.
+
+@node Tee Program, Uniq Program, Split Program, Clones
+@subsection Duplicating Output Into Multiple Files
+
+@cindex @code{tee} utility
+The @code{tee} program is known as a ``pipe fitting.''  @code{tee} copies
+its standard input to its standard output, and also duplicates it to the
+files named on the command line.  Its usage is:
+
+@example
+tee @r{[}-a@r{]} file @dots{}
+@end example
+
+The @samp{-a} option tells @code{tee} to append to the named files, instead of
+truncating them and starting over.
+
+The @code{BEGIN} rule first makes a copy of all the command line arguments,
+into an array named @code{copy}.
+@code{ARGV[0]} is not copied, since it is not needed.
+@code{tee} cannot use @code{ARGV} directly, since @code{awk} will attempt to
+process each file named in @code{ARGV} as input data.
+
+If the first argument is @samp{-a}, then the flag variable
+@code{append} is set to true, and both @code{ARGV[1]} and
+@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no file
+names were supplied, and @code{tee} prints a usage message and exits.
+Finally, @code{awk} is forced to read the standard input by setting
+@code{ARGV[1]} to @code{"-"}, and @code{ARGC} to two.
+
+@c 2e: the `ARGC--' in the `if (ARGV[1] == "-a")' isn't needed.
+
+@findex tee.awk
+@example
+@group
+@c file eg/prog/tee.awk
+# tee.awk --- tee in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+# Revised December 1995
+@end group
+
+@group
+BEGIN    \
+@{
+    for (i = 1; i < ARGC; i++)
+        copy[i] = ARGV[i]
+@end group
+
+@group
+    if (ARGV[1] == "-a") @{
+        append = 1
+        delete ARGV[1]
+        delete copy[1]
+        ARGC--
+    @}
+@end group
+@group
+    if (ARGC < 2) @{
+        print "usage: tee [-a] file ..." > "/dev/stderr"
+        exit 1
+    @}
+@end group
+@group
+    ARGV[1] = "-"
+    ARGC = 2
+@}
+@c endfile
+@end group
+@end example
+
+The single rule does all the work.  Since there is no pattern, it is
+executed for each line of input.  The body of the rule simply prints the
+line into each file on the command line, and then to the standard output.
+
+@example
+@group
+@c file eg/prog/tee.awk
+@{
+    # moving the if outside the loop makes it run faster
+    if (append)
+        for (i in copy)
+            print >> copy[i]
+    else
+        for (i in copy)
+            print > copy[i]
+    print
+@}
+@c endfile
+@end group
+@end example
+
+It would have been possible to code the loop this way:
+
+@example
+for (i in copy)
+    if (append)
+        print >> copy[i]
+    else
+        print > copy[i]
+@end example
+
+@noindent
+This is more concise, but it is also less efficient.  The @samp{if} is
+tested for each record and for each output file.  By duplicating the loop
+body, the @samp{if} is only tested once for each input record.  If there are
+@var{N} input records and @var{M} input files, the first method only
+executes @var{N} @samp{if} statements, while the second would execute
+@var{N}@code{*}@var{M} @samp{if} statements.
+
+Finally, the @code{END} rule cleans up, by closing all the output files.
+
+@example
+@c @group
+@c file eg/prog/tee.awk
+END    \
+@{
+    for (i in copy)
+        close(copy[i])
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Uniq Program, Wc Program, Tee Program, Clones
+@subsection Printing Non-duplicated Lines of Text
+
+@cindex @code{uniq} utility
+The @code{uniq} utility reads sorted lines of data on its standard input,
+and (by default) removes duplicate lines.  In other words, only unique lines
+are printed, hence the name.  @code{uniq} has a number of options. The usage is:
+
+@example
+uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
+@end example
+
+The option meanings are:
+
+@table @code
+@item -d
+Only print repeated lines.
+
+@item -u
+Only print non-repeated lines.
+
+@item -c
+Count lines. This option overrides @samp{-d} and @samp{-u}.  Both repeated
+and non-repeated lines are counted.
+
+@item -@var{n}
+Skip @var{n} fields before comparing lines.  The definition of fields
+is similar to @code{awk}'s default: non-whitespace characters separated
+by runs of spaces and/or tabs.
+
+@item +@var{n}
+Skip @var{n} characters before comparing lines.  Any fields specified with
+@samp{-@var{n}} are skipped first.
+
+@item @var{input file}
+Data is read from the input file named on the command line, instead of from
+the standard input.
+
+@item @var{output file}
+The generated output is sent to the named output file, instead of to the
+standard output.
+@end table
+
+Normally @code{uniq} behaves as if both the @samp{-d} and @samp{-u} options
+had been provided.
+
+Here is an @code{awk} implementation of @code{uniq}. It uses the
+@code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The program begins with a @code{usage} function and then a brief outline of
+the options and their meanings in a comment.
+
+The @code{BEGIN} rule deals with the command line arguments and options. It
+uses a trick to get @code{getopt} to handle options of the form @samp{-25},
+treating such an option as the option letter @samp{2} with an argument of
+@samp{5}. If indeed two or more digits were supplied (@code{Optarg} looks
+like a number), @code{Optarg} is
+concatenated with the option digit, and then result is added to zero to make
+it into a number.  If there is only one digit in the option, then
+@code{Optarg} is not needed, and @code{Optind} must be decremented so that
+@code{getopt} will process it next time.  This code is admittedly a bit
+tricky.
+
+If no options were supplied, then the default is taken, to print both
+repeated and non-repeated lines.  The output file, if provided, is assigned
+to @code{outputfile}.  Earlier, @code{outputfile} was initialized to the
+standard output, @file{/dev/stdout}.
+
+@findex uniq.awk
+@example
+@c @group
+@c file eg/prog/uniq.awk
+# uniq.awk --- do uniq in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+@group
+function usage(    e)
+@{
+    e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
+    print e > "/dev/stderr"
+    exit 1
+@}
+@end group
+
+@group
+# -c    count lines. overrides -d and -u
+# -d    only repeated lines
+# -u    only non-repeated lines
+# -n    skip n fields
+# +n    skip n characters, skip fields first
+@end group
+
+BEGIN    \
+@{
+    count = 1
+    outputfile = "/dev/stdout"
+    opts = "udc0:1:2:3:4:5:6:7:8:9:"
+    while ((c = getopt(ARGC, ARGV, opts)) != -1) @{
+        if (c == "u")
+            non_repeated_only++
+        else if (c == "d")
+            repeated_only++
+        else if (c == "c")
+            do_count++
+        else if (index("0123456789", c) != 0) @{
+            # getopt requires args to options
+            # this messes us up for things like -5
+            if (Optarg ~ /^[0-9]+$/)
+                fcount = (c Optarg) + 0
+            else @{
+                fcount = c + 0
+                Optind--
+            @}
+        @} else
+            usage()
+    @}
+
+    if (ARGV[Optind] ~ /^\+[0-9]+$/) @{
+        charcount = substr(ARGV[Optind], 2) + 0
+        Optind++
+    @}
+
+    for (i = 1; i < Optind; i++)
+        ARGV[i] = ""
+
+    if (repeated_only == 0 && non_repeated_only == 0)
+        repeated_only = non_repeated_only = 1
+
+@group
+    if (ARGC - Optind == 2) @{
+        outputfile = ARGV[ARGC - 1]
+        ARGV[ARGC - 1] = ""
+    @}
+@}
+@c endfile
+@end group
+@end example
+
+The following function, @code{are_equal}, compares the current line,
+@code{$0}, to the
+previous line, @code{last}.  It handles skipping fields and characters.
+
+If no field count and no character count were specified, @code{are_equal}
+simply returns one or zero depending upon the result of a simple string
+comparison of @code{last} and @code{$0}.  Otherwise, things get more
+complicated.
+
+If fields have to be skipped, each line is broken into an array using
+@code{split}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+and then the desired fields are joined back into a line using @code{join}.
+The joined lines are stored in @code{clast} and @code{cline}.
+If no fields are skipped, @code{clast} and @code{cline} are set to
+@code{last} and @code{$0} respectively.
+
+Finally, if characters are skipped, @code{substr} is used to strip off the
+leading @code{charcount} characters in @code{clast} and @code{cline}.  The
+two strings are then compared, and @code{are_equal} returns the result.
+
+@example
+@c @group
+@c file eg/prog/uniq.awk
+function are_equal(    n, m, clast, cline, alast, aline)
+@{
+    if (fcount == 0 && charcount == 0)
+        return (last == $0)
+
+    if (fcount > 0) @{
+        n = split(last, alast)
+        m = split($0, aline)
+        clast = join(alast, fcount+1, n)
+        cline = join(aline, fcount+1, m)
+    @} else @{
+        clast = last
+        cline = $0
+    @}
+    if (charcount) @{
+        clast = substr(clast, charcount + 1)
+        cline = substr(cline, charcount + 1)
+    @}
+
+    return (clast == cline)
+@}
+@c endfile
+@c @end group
+@end example
+
+The following two rules are the body of the program.  The first one is
+executed only for the very first line of data.  It sets @code{last} equal to
+@code{$0}, so that subsequent lines of text have something to be compared to.
+
+The second rule does the work. The variable @code{equal} will be one or zero
+depending upon the results of @code{are_equal}'s comparison. If @code{uniq}
+is counting repeated lines, then the @code{count} variable is incremented if
+the lines are equal. Otherwise the line is printed and @code{count} is
+reset, since the two lines are not equal.
+
+If @code{uniq} is not counting, @code{count} is incremented if the lines are
+equal. Otherwise, if @code{uniq} is counting repeated lines, and more than
+one line has been seen, or if @code{uniq} is counting non-repeated lines,
+and only one line has been seen, then the line is printed, and @code{count}
+is reset.
+
+Finally, similar logic is used in the @code{END} rule to print the final
+line of input data.
+
+@example
+@c @group
+@c file eg/prog/uniq.awk
+@group
+NR == 1 @{
+    last = $0
+    next
+@}
+@end group
+    
+@{
+    equal = are_equal()
+
+    if (do_count) @{    # overrides -d and -u
+        if (equal)
+            count++
+        else @{
+            printf("%4d %s\n", count, last) > outputfile
+            last = $0
+            count = 1    # reset
+        @}
+        next
+    @}
+
+    if (equal)
+        count++
+    else @{
+        if ((repeated_only && count > 1) ||
+            (non_repeated_only && count == 1))
+                print last > outputfile
+        last = $0
+        count = 1
+    @}
+@}
+
+@group
+END @{
+    if (do_count)
+        printf("%4d %s\n", count, last) > outputfile
+    else if ((repeated_only && count > 1) ||
+            (non_repeated_only && count == 1))
+        print last > outputfile
+@}
+@end group
+@c endfile
+@c @end group
+@end example
+
+@node Wc Program,  , Uniq Program, Clones
+@subsection Counting Things
+
+@cindex @code{wc} utility
+The @code{wc} (word count) utility counts lines, words, and characters in
+one or more input files. Its usage is:
+
+@example
+wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
+@end example
+
+If no files are specified on the command line, @code{wc} reads its standard
+input. If there are multiple files, it will also print total counts for all
+the files.  The options and their meanings are:
+
+@table @code
+@item -l
+Only count lines.
+
+@item -w
+Only count words.
+A ``word'' is a contiguous sequence of non-whitespace characters, separated
+by spaces and/or tabs.  Happily, this is the normal way @code{awk} separates
+fields in its input data.
+
+@item -c
+Only count characters.
+@end table
+
+Implementing @code{wc} in @code{awk} is particularly elegant, since
+@code{awk} does a lot of the work for us; it splits lines into words (i.e.@:
+fields) and counts them, it counts lines (i.e.@: records) for us, and it can
+easily tell us how long a line is.
+
+This version uses the @code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the file transition functions
+(@pxref{Filetrans Function, ,Noting Data File Boundaries}).
+
+This version has one major difference from traditional versions of @code{wc}.
+Our version always prints the counts in the order lines, words,
+and characters.  Traditional versions note the order of the @samp{-l},
+@samp{-w}, and @samp{-c} options on the command line, and print the counts
+in that order.
+
+The @code{BEGIN} rule does the argument processing.
+The variable @code{print_total} will
+be true if more than one file was named on the command line.
+
+@findex wc.awk
+@example
+@c @group
+@c file eg/prog/wc.awk
+# wc.awk --- count lines, words, characters
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+#    -l    only count lines
+#    -w    only count words
+#    -c    only count characters
+#
+# Default is to count lines, words, characters
+
+BEGIN @{
+    # let getopt print a message about
+    # invalid options. we ignore them
+    while ((c = getopt(ARGC, ARGV, "lwc")) != -1) @{
+        if (c == "l")
+            do_lines = 1
+        else if (c == "w")
+            do_words = 1
+        else if (c == "c")
+            do_chars = 1
+    @}
+    for (i = 1; i < Optind; i++)
+        ARGV[i] = ""
+
+    # if no options, do all
+    if (! do_lines && ! do_words && ! do_chars)
+        do_lines = do_words = do_chars = 1
+
+    print_total = (ARGC - i > 2)
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{beginfile} function is simple; it just resets the counts of lines,
+words, and characters to zero, and saves the current file name in
+@code{fname}.
+
+The @code{endfile} function adds the current file's numbers to the running
+totals of lines, words, and characters.  It then prints out those numbers
+for the file that was just read. It relies on @code{beginfile} to reset the
+numbers for the following data file.
+
+@example
+@c @group
+@c file eg/prog/wc.awk
+function beginfile(file)
+@{
+    chars = lines = words = 0
+    fname = FILENAME
+@}
+
+function endfile(file)
+@{
+    tchars += chars
+    tlines += lines
+    twords += words
+@group
+    if (do_lines)
+        printf "\t%d", lines
+@end group
+    if (do_words)
+        printf "\t%d", words
+    if (do_chars)
+        printf "\t%d", chars
+    printf "\t%s\n", fname
+@}
+@c endfile
+@c @end group
+@end example
+
+There is one rule that is executed for each line. It adds the length of the
+record to @code{chars}.  It has to add one, since the newline character
+separating records (the value of @code{RS}) is not part of the record
+itself.  @code{lines} is incremented for each line read, and @code{words} is
+incremented by the value of @code{NF}, the number of ``words'' on this
+line.@footnote{Examine the code in
+@ref{Filetrans Function, ,Noting Data File Boundaries}.
+Why must @code{wc} use a separate @code{lines} variable, instead of using
+the value of @code{FNR} in @code{endfile}?}
+
+Finally, the @code{END} rule simply prints the totals for all the files.
+
+@example
+@c @group
+@c file eg/prog/wc.awk
+# do per line
+@{
+    chars += length($0) + 1    # get newline
+    lines++
+    words += NF
+@}
+
+END @{
+    if (print_total) @{
+        if (do_lines)
+            printf "\t%d", tlines
+        if (do_words)
+            printf "\t%d", twords
+        if (do_chars)
+            printf "\t%d", tchars
+        print "\ttotal"
+    @}
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Miscellaneous Programs,  , Clones, Sample Programs
+@section A Grab Bag of @code{awk} Programs
+
+This section is a large ``grab bag'' of miscellaneous programs.
+We hope you find them both interesting and enjoyable.
+
+@menu
+* Dupword Program::         Finding duplicated words in a document.
+* Alarm Program::           An alarm clock.
+* Translate Program::       A program similar to the @code{tr} utility.
+* Labels Program::          Printing mailing labels.
+* Word Sorting::            A program to produce a word usage count.
+* History Sorting::         Eliminating duplicate entries from a history
+                            file.
+* Extract Program::         Pulling out programs from Texinfo source
+                            files.
+* Simple Sed::              A Simple Stream Editor.
+* Igawk Program::           A wrapper for @code{awk} that includes files.
+@end menu
+
+@node Dupword Program, Alarm Program, Miscellaneous Programs, Miscellaneous Programs
+@subsection Finding Duplicated Words in a Document
+
+A common error when writing large amounts of prose is to accidentally
+duplicate words.  Often you will see this in text as something like ``the
+the program does the following @dots{}.''  When the text is on-line, often
+the duplicated words occur at the end of one line and the beginning of
+another, making them very difficult to spot.
+@c as here!
+
+This program, @file{dupword.awk}, scans through a file one line at a time,
+and looks for adjacent occurrences of the same word.  It also saves the last
+word on a line (in the variable @code{prev}) for comparison with the first
+word on the next line.
+
+The first two statements make sure that the line is all lower-case, so that,
+for example,
+``The'' and ``the'' compare equal to each other.  The second statement
+removes all non-alphanumeric and non-whitespace characters from the line, so
+that punctuation does not affect the comparison either.  This sometimes
+leads to reports of duplicated words that really are different, but this is
+unusual.
+
+@c FIXME: add check for $i != ""
+@findex dupword.awk
+@example
+@group
+@c file eg/prog/dupword.awk
+# dupword --- find duplicate words in text
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# December 1991
+
+@{
+    $0 = tolower($0)
+    gsub(/[^A-Za-z0-9 \t]/, "");
+    if ($1 == prev)
+        printf("%s:%d: duplicate %s\n",
+            FILENAME, FNR, $1)
+    for (i = 2; i <= NF; i++)
+        if ($i == $(i-1))
+            printf("%s:%d: duplicate %s\n",
+                FILENAME, FNR, $i)
+    prev = $NF
+@}
+@c endfile
+@end group
+@end example
+
+@node Alarm Program, Translate Program, Dupword Program, Miscellaneous Programs
+@subsection An Alarm Clock Program
+
+The following program is a simple ``alarm clock'' program.
+You give it a time of day, and an optional message.  At the given time,
+it prints the message on the standard output. In addition, you can give it
+the number of times to repeat the message, and also a delay between
+repetitions.
+
+This program uses the @code{gettimeofday} function from
+@ref{Gettimeofday Function, ,Managing the Time of Day}.
+
+All the work is done in the @code{BEGIN} rule.  The first part is argument
+checking and setting of defaults; the delay, the count, and the message to
+print.  If the user supplied a message, but it does not contain the ASCII BEL
+character (known as the ``alert'' character, @samp{\a}), then it is added to
+the message.  (On many systems, printing the ASCII BEL generates some sort
+of audible alert. Thus, when the alarm goes off, the system calls attention
+to itself, in case the user is not looking at their computer or terminal.)
+
+@findex alarm.awk
+@example
+@c @group
+@c file eg/prog/alarm.awk
+# alarm --- set an alarm
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: alarm time [ "message" [ count [ delay ] ] ]
+
+BEGIN    \
+@{
+    # Initial argument sanity checking
+    usage1 = "usage: alarm time ['message' [count [delay]]]"
+    usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
+
+    if (ARGC < 2) @{
+        print usage > "/dev/stderr"
+        exit 1
+    @} else if (ARGC == 5) @{
+        delay = ARGV[4] + 0
+        count = ARGV[3] + 0
+        message = ARGV[2]
+    @} else if (ARGC == 4) @{
+        count = ARGV[3] + 0
+        message = ARGV[2]
+    @} else if (ARGC == 3) @{
+        message = ARGV[2]
+    @} else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) @{
+        print usage1 > "/dev/stderr"
+        print usage2 > "/dev/stderr"
+        exit 1
+    @}
+
+    # set defaults for once we reach the desired time
+    if (delay == 0)
+        delay = 180    # 3 minutes
+    if (count == 0)
+        count = 5
+@group
+    if (message == "")
+        message = sprintf("\aIt is now %s!\a", ARGV[1])
+    else if (index(message, "\a") == 0)
+        message = "\a" message "\a"
+@end group
+@c endfile
+@end example
+
+The next section of code turns the alarm time into hours and minutes,
+and converts it if necessary to a 24-hour clock.  Then it turns that
+time into a count of the seconds since midnight.  Next it turns the current
+time into a count of seconds since midnight.  The difference between the two
+is how long to wait before setting off the alarm.
+
+@example
+@c @group
+@c file eg/prog/alarm.awk
+    # split up dest time
+    split(ARGV[1], atime, ":")
+    hour = atime[1] + 0    # force numeric
+    minute = atime[2] + 0  # force numeric
+
+    # get current broken down time
+    gettimeofday(now)
+
+    # if time given is 12-hour hours and it's after that
+    # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m.,
+    # then add 12 to real hour
+    if (hour < 12 && now["hour"] > hour)
+        hour += 12
+
+    # set target time in seconds since midnight
+    target = (hour * 60 * 60) + (minute * 60)
+
+    # get current time in seconds since midnight
+    current = (now["hour"] * 60 * 60) + \
+               (now["minute"] * 60) + now["second"]
+
+    # how long to sleep for
+    naptime = target - current
+    if (naptime <= 0) @{
+        print "time is in the past!" > "/dev/stderr"
+        exit 1
+    @}
+@c endfile
+@c @end group
+@end example
+
+Finally, the program uses the @code{system} function
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output})
+to call the @code{sleep} utility.  The @code{sleep} utility simply pauses
+for the given number of seconds.  If the exit status is not zero,
+the program assumes that @code{sleep} was interrupted, and exits. If
+@code{sleep} exited with an OK status (zero), then the program prints the
+message in a loop, again using @code{sleep} to delay for however many
+seconds are necessary.
+
+@example
+@c @group
+@c file eg/prog/alarm.awk
+    # zzzzzz..... go away if interrupted
+    if (system(sprintf("sleep %d", naptime)) != 0)
+        exit 1
+
+    # time to notify!
+    command = sprintf("sleep %d", delay)
+    for (i = 1; i <= count; i++) @{
+        print message
+        # if sleep command interrupted, go away
+        if (system(command) != 0)
+            break
+    @}
+
+    exit 0
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Translate Program, Labels Program, Alarm Program, Miscellaneous Programs
+@subsection Transliterating Characters
+
+The system @code{tr} utility transliterates characters.  For example, it is
+often used to map upper-case letters into lower-case, for further
+processing.
+
+@example
+@var{generate data} | tr '[A-Z]' '[a-z]' | @var{process data} @dots{}
+@end example
+
+You give @code{tr} two lists of characters enclosed in square brackets.
+Usually, the lists are quoted to keep the shell from attempting to do a
+filename expansion.@footnote{On older, non-POSIX systems, @code{tr} often
+does not require that the lists be enclosed in square brackets and quoted.
+This is a feature.}  When processing the input, the
+first character in the first list is replaced with the first character in the
+second list, the second character in the first list is replaced with the
+second character in the second list, and so on.
+If there are more characters in the ``from'' list than in the ``to'' list,
+the last character of the ``to'' list is used for the remaining characters
+in the ``from'' list.
+
+Some time ago,
+@c early or mid-1989!
+a user proposed to us that we add a transliteration function to @code{gawk}.
+Being opposed to ``creeping featurism,'' I wrote the following program to
+prove that character transliteration could be done with a user-level
+function.  This program is not as complete as the system @code{tr} utility,
+but it will do most of the job.
+
+The @code{translate} program demonstrates one of the few weaknesses of
+standard
+@code{awk}: dealing with individual characters is very painful, requiring
+repeated use of the @code{substr}, @code{index}, and @code{gsub} built-in
+functions
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@footnote{This
+program was written before @code{gawk} acquired the ability to
+split each character in a string into separate array elements.
+How might this ability simplify the program?}
+
+There are two functions.  The first, @code{stranslate}, takes three
+arguments.
+
+@table @code
+@item from
+A list of characters to translate from.
+
+@item to
+A list of characters to translate to.
+
+@item target
+The string to do the translation on.
+@end table
+
+Associative arrays make the translation part fairly easy. @code{t_ar} holds
+the ``to'' characters, indexed by the ``from'' characters.  Then a simple
+loop goes through @code{from}, one character at a time.  For each character
+in @code{from}, if the character appears in @code{target}, @code{gsub}
+is used to change it to the corresponding @code{to} character.
+
+The @code{translate} function simply calls @code{stranslate} using @code{$0}
+as the target.  The main program sets two global variables, @code{FROM} and
+@code{TO}, from the command line, and then changes @code{ARGV} so that
+@code{awk} will read from the standard input.
+
+Finally, the processing rule simply calls @code{translate} for each record.
+
+@findex translate.awk
+@example
+@c @group
+@c file eg/prog/translate.awk
+# translate --- do tr like stuff
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# August 1989
+
+# bugs: does not handle things like: tr A-Z a-z, it has
+# to be spelled out. However, if `to' is shorter than `from',
+# the last character in `to' is used for the rest of `from'.
+
+function stranslate(from, to, target,     lf, lt, t_ar, i, c)
+@{
+    lf = length(from)
+    lt = length(to)
+    for (i = 1; i <= lt; i++)
+        t_ar[substr(from, i, 1)] = substr(to, i, 1)
+    if (lt < lf)
+        for (; i <= lf; i++)
+            t_ar[substr(from, i, 1)] = substr(to, lt, 1)
+    for (i = 1; i <= lf; i++) @{
+        c = substr(from, i, 1)
+        if (index(target, c) > 0)
+            gsub(c, t_ar[c], target)
+    @}
+    return target
+@}
+
+@group
+function translate(from, to)
+@{
+    return $0 = stranslate(from, to, $0)
+@}
+@end group
+
+# main program
+BEGIN @{
+    if (ARGC < 3) @{
+        print "usage: translate from to" > "/dev/stderr"
+        exit
+    @}
+    FROM = ARGV[1]
+    TO = ARGV[2]
+    ARGC = 2
+    ARGV[1] = "-"
+@}
+
+@{
+    translate(FROM, TO)
+    print
+@}
+@c endfile
+@c @end group
+@end example
+
+While it is possible to do character transliteration in a user-level
+function, it is not necessarily efficient, and we started to consider adding
+a built-in function.  However, shortly after writing this program, we learned
+that the System V Release 4 @code{awk} had added the @code{toupper} and
+@code{tolower} functions.  These functions handle the vast majority of the
+cases where character transliteration is necessary, and so we chose to
+simply add those functions to @code{gawk} as well, and then leave well
+enough alone.
+
+An obvious improvement to this program would be to set up the
+@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
+assumes that the ``from'' and ``to'' lists
+will never change throughout the lifetime of the program.
+
+@node Labels Program, Word Sorting, Translate Program, Miscellaneous Programs
+@subsection Printing Mailing Labels
+
+Here is a ``real world''@footnote{``Real world'' is defined as
+``a program actually used to get something done.''}
+program.  This script reads lists of names and
+addresses, and generates mailing labels.  Each page of labels has 20 labels
+on it, two across and ten down.  The addresses are guaranteed to be no more
+than five lines of data.  Each address is separated from the next by a blank
+line.
+
+The basic idea is to read 20 labels worth of data.  Each line of each label
+is stored in the @code{line} array.  The single rule takes care of filling
+the @code{line} array and printing the page when 20 labels have been read.
+
+The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
+@code{awk} will split records at blank lines
+(@pxref{Records, ,How Input is Split into Records}).
+It sets @code{MAXLINES} to 100, since @code{MAXLINE} is the maximum number
+of lines on the page (20 * 5 = 100).
+
+Most of the work is done in the @code{printpage} function.
+The label lines are stored sequentially in the @code{line} array.  But they
+have to be printed horizontally; @code{line[1]} next to @code{line[6]},
+@code{line[2]} next to @code{line[7]}, and so on.  Two loops are used to
+accomplish this.  The outer loop, controlled by @code{i}, steps through
+every 10 lines of data; this is each row of labels.  The inner loop,
+controlled by @code{j}, goes through the lines within the row.
+As @code{j} goes from zero to four, @samp{i+j} is the @code{j}'th line in
+the row, and @samp{i+j+5} is the entry next to it.  The output ends up
+looking something like this:
+
+@example
+line 1          line 6
+line 2          line 7
+line 3          line 8
+line 4          line 9
+line 5          line 10
+@end example
+
+As a final note, at lines 21 and 61, an extra blank line is printed, to keep
+the output lined up on the labels.  This is dependent on the particular
+brand of labels in use when the program was written.  You will also note
+that there are two blank lines at the top and two blank lines at the bottom.
+
+The @code{END} rule arranges to flush the final page of labels; there may
+not have been an even multiple of 20 labels in the data.
+
+@findex labels.awk
+@example
+@c @group
+@c file eg/prog/labels.awk
+# labels.awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# June 1992
+
+# Program to print labels.  Each label is 5 lines of data
+# that may have blank lines.  The label sheets have 2
+# blank lines at the top and 2 at the bottom.
+
+BEGIN    @{ RS = "" ; MAXLINES = 100 @}
+
+function printpage(    i, j)
+@{
+    if (Nlines <= 0)
+        return
+
+    printf "\n\n"        # header
+
+    for (i = 1; i <= Nlines; i += 10) @{
+        if (i == 21 || i == 61)
+            print ""
+        for (j = 0; j < 5; j++) @{
+            if (i + j > MAXLINES)
+                break
+            printf "   %-41s %s\n", line[i+j], line[i+j+5]
+        @}
+        print ""
+    @}
+
+    printf "\n\n"        # footer
+
+    for (i in line)
+        line[i] = ""
+@}
+
+# main rule
+@{
+    if (Count >= 20) @{
+        printpage()
+        Count = 0
+        Nlines = 0
+    @}
+    n = split($0, a, "\n")
+    for (i = 1; i <= n; i++)
+        line[++Nlines] = a[i]
+    for (; i <= 5; i++)
+        line[++Nlines] = ""
+    Count++
+@}
+
+END    \
+@{
+    printpage()
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Word Sorting, History Sorting, Labels Program, Miscellaneous Programs
+@subsection Generating Word Usage Counts
+
+The following @code{awk} program prints
+the number of occurrences of each word in its input.  It illustrates the
+associative nature of @code{awk} arrays by using strings as subscripts.  It
+also demonstrates the @samp{for @var{x} in @var{array}} construction.
+Finally, it shows how @code{awk} can be used in conjunction with other
+utility programs to do a useful task of some complexity with a minimum of
+effort.  Some explanations follow the program listing.
+
+@example
+awk '
+# Print list of word frequencies
+@{
+    for (i = 1; i <= NF; i++)
+        freq[$i]++
+@}
+
+END @{
+    for (word in freq)
+        printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+The first thing to notice about this program is that it has two rules.  The
+first rule, because it has an empty pattern, is executed on every line of
+the input.  It uses @code{awk}'s field-accessing mechanism
+(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
+the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
+to know how many fields are available.
+
+For each input word, an element of the array @code{freq} is incremented to
+reflect that the word has been seen an additional time.
+
+The second rule, because it has the pattern @code{END}, is not executed
+until the input has been exhausted.  It prints out the contents of the
+@code{freq} table that has been built up inside the first action.
+
+This program has several problems that would prevent it from being
+useful by itself on real text files:
+
+@itemize @bullet
+@item
+Words are detected using the @code{awk} convention that fields are
+separated by whitespace and that other characters in the input (except
+newlines) don't have any special meaning to @code{awk}.  This means that
+punctuation characters count as part of words.
+
+@item
+The @code{awk} language considers upper- and lower-case characters to be
+distinct.  Therefore, @samp{bartender} and @samp{Bartender} are not treated
+as the same word.  This is undesirable since, in normal text, words
+are capitalized if they begin sentences, and a frequency analyzer should not
+be sensitive to capitalization.
+
+@item
+The output does not come out in any useful order.  You're more likely to be
+interested in which words occur most frequently, or having an alphabetized
+table of how frequently each word occurs.
+@end itemize
+
+The way to solve these problems is to use some of the more advanced
+features of the @code{awk} language.  First, we use @code{tolower} to remove
+case distinctions.  Next, we use @code{gsub} to remove punctuation
+characters.  Finally, we use the system @code{sort} utility to process the
+output of the @code{awk} script.  Here is the new version of
+the program:
+
+@findex wordfreq.sh
+@example
+@c file eg/prog/wordfreq.awk
+# Print list of word frequencies
+@{
+    $0 = tolower($0)    # remove case distinctions
+    gsub(/[^a-z0-9_ \t]/, "", $0)  # remove punctuation
+    for (i = 1; i <= NF; i++)
+        freq[$i]++
+@}
+@c endfile
+
+END @{
+    for (word in freq)
+        printf "%s\t%d\n", word, freq[word]
+@}
+@end example
+
+Assuming we have saved this program in a file named @file{wordfreq.awk},
+and that the data is in @file{file1}, the following pipeline
+
+@example
+awk -f wordfreq.awk file1 | sort +1 -nr
+@end example
+
+@noindent
+produces a table of the words appearing in @file{file1} in order of
+decreasing frequency.
+
+The @code{awk} program suitably massages the data and produces a word
+frequency table, which is not ordered.
+
+The @code{awk} script's output is then sorted by the @code{sort} utility and
+printed on the terminal.  The options given to @code{sort} in this example
+specify to sort using the second field of each input line (skipping one field),
+that the sort keys should be treated as numeric quantities (otherwise
+@samp{15} would come before @samp{5}), and that the sorting should be done
+in descending (reverse) order.
+
+We could have even done the @code{sort} from within the program, by
+changing the @code{END} action to:
+
+@example
+@c file eg/prog/wordfreq.awk
+END @{
+    sort = "sort +1 -nr"
+    for (word in freq)
+        printf "%s\t%d\n", word, freq[word] | sort
+    close(sort)
+@}
+@c endfile
+@end example
+
+You would have to use this way of sorting on systems that do not
+have true pipes.
+
+See the general operating system documentation for more information on how
+to use the @code{sort} program.
+
+@node History Sorting, Extract Program, Word Sorting, Miscellaneous Programs
+@subsection Removing Duplicates from Unsorted Text
+
+The @code{uniq} program
+(@pxref{Uniq Program, ,Printing Non-duplicated Lines of Text}),
+removes duplicate lines from @emph{sorted} data.
+
+Suppose, however, you need to remove duplicate lines from a data file, but
+that you wish to preserve the order the lines are in?  A good example of
+this might be a shell history file.  The history file keeps a copy of all
+the commands you have entered, and it is not unusual to repeat a command
+several times in a row.  Occasionally you might wish to compact the history
+by removing duplicate entries.  Yet it is desirable to maintain the order
+of the original commands.
+
+This simple program does the job.  It uses two arrays.  The @code{data}
+array is indexed by the text of each line.
+For each line, @code{data[$0]} is incremented.
+
+If a particular line has not
+been seen before, then @code{data[$0]} will be zero.
+In that case, the text of the line is stored in @code{lines[count]}.
+Each element of @code{lines} is a unique command, and the indices of
+@code{lines} indicate the order in which those lines were encountered.
+The @code{END} rule simply prints out the lines, in order.
+
+@cindex Rakitzis, Byron
+@findex histsort.awk
+@example
+@group
+@c file eg/prog/histsort.awk
+# histsort.awk --- compact a shell history file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Thanks to Byron Rakitzis for the general idea
+@{
+    if (data[$0]++ == 0)
+        lines[++count] = $0
+@}
+
+END @{
+    for (i = 1; i <= count; i++)
+        print lines[i]
+@}
+@c endfile
+@end group
+@end example
+
+This program also provides a foundation for generating other useful
+information.  For example, using the following @code{print} satement in the
+@code{END} rule would indicate how often a particular command was used.
+
+@example
+print data[lines[i]], lines[i]
+@end example
+
+This works because @code{data[$0]} was incremented each time a line was
+seen.
+
+@node Extract Program, Simple Sed, History Sorting, Miscellaneous Programs
+@subsection Extracting Programs from Texinfo Source Files
+
+@iftex
+Both this chapter and the previous chapter
+(@ref{Library Functions, ,A Library of @code{awk} Functions}),
+present a large number of @code{awk} programs.
+@end iftex
+@ifinfo
+The nodes
+@ref{Library Functions, ,A Library of @code{awk} Functions},
+and @ref{Sample Programs, ,Practical @code{awk} Programs},
+are the top level nodes for a large number of @code{awk} programs.
+@end ifinfo
+If you wish to experiment with these programs, it is tedious to have to type
+them in by hand.  Here we present a program that can extract parts of a
+Texinfo input file into separate files.
+
+This @value{DOCUMENT} is written in Texinfo, the GNU project's document
+formatting language.  A single Texinfo source file can be used to produce both
+printed and on-line documentation.
+@iftex
+Texinfo is fully documented in @cite{Texinfo---The GNU Documentation Format},
+available from the Free Software Foundation.
+@end iftex
+@ifinfo
+The Texinfo language is described fully, starting with
+@ref{Top, , Introduction, texi, Texinfo---The GNU Documentation Format}.
+@end ifinfo
+
+For our purposes, it is enough to know three things about Texinfo input
+files.
+
+@itemize @bullet
+@item
+The ``at'' symbol, @samp{@@}, is special in Texinfo, much like @samp{\} in C
+or @code{awk}.  Literal @samp{@@} symbols are represented in Texinfo source
+files as @samp{@@@@}.
+
+@item
+Comments start with either @samp{@@c} or @samp{@@comment}.
+The file extraction program will work by using special comments that start
+at the beginning of a line.
+
+@item
+Example text that should not be split across a page boundary is bracketed
+between lines containing @samp{@@group} and @samp{@@end group} commands.
+@end itemize
+
+The following program, @file{extract.awk}, reads through a Texinfo source
+file, and does two things, based on the special comments.
+Upon seeing @samp{@w{@@c system @dots{}}},
+it runs a command, by extracting the command text from the
+control line and passing it on to the @code{system} function
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+Upon seeing @samp{@@c file @var{filename}}, each subsequent line is sent to
+the file @var{filename}, until @samp{@@c endfile} is encountered.
+The rules in @file{extract.awk} will match either @samp{@@c} or
+@samp{@@comment} by letting the @samp{omment} part be optional.
+Lines containing @samp{@@group} and @samp{@@end group} are simply removed.
+@file{extract.awk} uses the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The example programs in the on-line Texinfo source for @cite{@value{TITLE}}
+(@file{gawk.texi}) have all been bracketed inside @samp{file},
+and @samp{endfile} lines.  The @code{gawk} distribution uses a copy of
+@file{extract.awk} to extract the sample
+programs and install many of them in a standard directory, where
+@code{gawk} can find them.
+
+@file{extract.awk} begins by setting @code{IGNORECASE} to one, so that
+mixed upper-case and lower-case letters in the directives won't matter.
+
+The first rule handles calling @code{system}, checking that a command was
+given (@code{NF} is at least three), and also checking that the command
+exited with a zero exit status, signifying OK.
+
+@findex extract.awk
+@example
+@c @group
+@c file eg/prog/extract.awk
+# extract.awk --- extract files and run programs
+#                 from texinfo files
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN    @{ IGNORECASE = 1 @}
+
+@group
+/^@@c(omment)?[ \t]+system/    \
+@{
+    if (NF < 3) @{
+        e = (FILENAME ":" FNR)
+        e = (e  ": badly formed `system' line")
+        print e > "/dev/stderr"
+        next
+    @}
+    $1 = ""
+    $2 = ""
+    stat = system($0)
+    if (stat != 0) @{
+        e = (FILENAME ":" FNR)
+        e = (e ": warning: system returned " stat)
+        print e > "/dev/stderr"
+    @}
+@}
+@end group
+@c endfile
+@end example
+
+@noindent
+The variable @code{e} is used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+The second rule handles moving data into files.  It verifies that a file
+name was given in the directive.  If the file named is not the current file,
+then the current file is closed.  This means that an @samp{@@c endfile} was
+not given for that file.  (We should probably print a diagnostic in this
+case, although at the moment we do not.)
+
+The @samp{for} loop does the work.  It reads lines using @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+For an unexpected end of file, it calls the @code{@w{unexpected_eof}}
+function.  If the line is an ``endfile'' line, then it breaks out of
+the loop.
+If the line is an @samp{@@group} or @samp{@@end group} line, then it
+ignores it, and goes on to the next line.
+
+Most of the work is in the following few lines.  If the line has no @samp{@@}
+symbols, it can be printed directly.  Otherwise, each leading @samp{@@} must be
+stripped off.
+
+To remove the @samp{@@} symbols, the line is split into separate elements of
+the array @code{a}, using the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Each element of @code{a} that is empty indicates two successive @samp{@@}
+symbols in the original line.  For each two empty elements (@samp{@@@@} in
+the original file), we have to add back in a single @samp{@@} symbol.
+
+When the processing of the array is finished, @code{join} is called with the
+value of @code{SUBSEP}, to rejoin the pieces back into a single
+line.  That line is then printed to the output file.
+
+@example
+@c @group
+@c file eg/prog/extract.awk
+@group
+/^@@c(omment)?[ \t]+file/    \
+@{
+    if (NF != 3) @{
+        e = (FILENAME ":" FNR ": badly formed `file' line")
+        print e > "/dev/stderr"
+        next
+    @}
+@end group
+    if ($3 != curfile) @{
+        if (curfile != "")
+            close(curfile)
+        curfile = $3
+    @}
+
+    for (;;) @{
+        if ((getline line) <= 0)
+            unexpected_eof()
+        if (line ~ /^@@c(omment)?[ \t]+endfile/)
+            break
+        else if (line ~ /^@@(end[ \t]+)?group/)
+            continue
+        if (index(line, "@@") == 0) @{
+            print line > curfile
+            continue
+        @}
+        n = split(line, a, "@@")
+@group
+        # if a[1] == "", means leading @@,
+        # don't add one back in.
+@end group
+        for (i = 2; i <= n; i++) @{
+            if (a[i] == "") @{ # was an @@@@
+                a[i] = "@@"
+                if (a[i+1] == "")
+                    i++
+            @}
+        @}
+        print join(a, 1, n, SUBSEP) > curfile
+    @}
+@}
+@c endfile
+@c @end group
+@end example
+
+An important thing to note is the use of the @samp{>} redirection.
+Output done with @samp{>} only opens the file once; it stays open and
+subsequent output is appended to the file
+(@pxref{Redirection, , Redirecting Output of @code{print} and @code{printf}}).
+This allows us to easily mix program text and explanatory prose for the same
+sample source file (as has been done here!) without any hassle.  The file is
+only closed when a new data file name is encountered, or at the end of the
+input file.
+
+Finally, the function @code{@w{unexpected_eof}} prints an appropriate
+error message and then exits.
+
+The @code{END} rule handles the final cleanup, closing the open file.
+
+@example
+@c file eg/prog/extract.awk
+@group
+function unexpected_eof()
+@{
+    printf("%s:%d: unexpected EOF or error\n", \
+        FILENAME, FNR) > "/dev/stderr"
+    exit 1
+@}
+@end group
+
+END @{
+    if (curfile)
+        close(curfile)
+@}
+@c endfile
+@end example
+
+@node Simple Sed, Igawk Program, Extract Program, Miscellaneous Programs
+@subsection A Simple Stream Editor
+
+@cindex @code{sed} utility
+The @code{sed} utility is a ``stream editor,'' a program that reads a
+stream of data, makes changes to it, and passes the modified data on.
+It is often used to make global changes to a large file, or to a stream
+of data generated by a pipeline of commands.
+
+While @code{sed} is a complicated program in its own right, its most common
+use is to perform global substitutions in the middle of a pipeline:
+
+@example
+command1 < orig.data | sed 's/old/new/g' | command2 > result
+@end example
+
+Here, the @samp{s/old/new/g} tells @code{sed} to look for the regexp
+@samp{old} on each input line, and replace it with the text @samp{new},
+globally (i.e.@: all the occurrences on a line).  This is similar to
+@code{awk}'s @code{gsub} function
+(@pxref{String Functions, , Built-in Functions for String Manipulation}).
+
+The following program, @file{awksed.awk}, accepts at least two command line
+arguments; the pattern to look for and the text to replace it with. Any
+additional arguments are treated as data file names to process. If none
+are provided, the standard input is used.
+
+@cindex Brennan, Michael
+@cindex @code{awksed}
+@cindex simple stream editor
+@cindex stream editor, simple
+@example
+@c @group
+@c file eg/prog/awksed.awk
+# awksed.awk --- do s/foo/bar/g using just print
+#    Thanks to Michael Brennan for the idea
+
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# August 1995
+
+@group
+function usage()
+@{
+    print "usage: awksed pat repl [files...]" > "/dev/stderr"
+    exit 1
+@}
+@end group
+
+BEGIN @{
+    # validate arguments
+    if (ARGC < 3)
+        usage()
+
+    RS = ARGV[1]
+    ORS = ARGV[2]
+
+    # don't use arguments as files
+    ARGV[1] = ARGV[2] = ""
+@}
+
+# look ma, no hands!
+@{
+    if (RT == "")
+        printf "%s", $0
+    else
+        print
+@}
+@c endfile
+@c @end group
+@end example
+
+The program relies on @code{gawk}'s ability to have @code{RS} be a regexp
+and on the setting of @code{RT} to the actual text that terminated the
+record (@pxref{Records, ,How Input is Split into Records}).
+
+The idea is to have @code{RS} be the pattern to look for. @code{gawk}
+will automatically set @code{$0} to the text between matches of the pattern.
+This is text that we wish to keep, unmodified.  Then, by setting @code{ORS}
+to the replacement text, a simple @code{print} statement will output the
+text we wish to keep, followed by the replacement text.
+
+There is one wrinkle to this scheme, which is what to do if the last record
+doesn't end with text that matches @code{RS}?  Using a @code{print}
+statement unconditionally prints the replacement text, which is not correct.
+
+However, if the file did not end in text that matches @code{RS}, @code{RT}
+will be set to the null string.  In this case, we can print @code{$0} using
+@code{printf}
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+
+The @code{BEGIN} rule handles the setup, checking for the right number
+of arguments, and calling @code{usage} if there is a problem. Then it sets
+@code{RS} and @code{ORS} from the command line arguments, and sets
+@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they will
+not be treated as file names
+(@pxref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}).
+
+The @code{usage} function prints an error message and exits.
+
+Finally, the single rule handles the printing scheme outlined above,
+using @code{print} or @code{printf} as appropriate, depending upon the
+value of @code{RT}.
+
+@ignore
+Exercise, compare the performance of this version with the more
+straightforward:
+
+BEGIN {
+    pat = ARGV[1]
+    repl = ARGV[2]
+    ARGV[1] = ARGV[2] = ""
+}
+
+{ gsub(pat, repl); print }
+
+Exercise: what are the advantages and disadvantages of this version vs. sed?
+  Advantage: egrep regexps
+             speed (?)
+  Disadvantage: no & in replacement text
+
+Others?
+@end ignore
+
+@node Igawk Program, , Simple Sed, Miscellaneous Programs
+@subsection An Easy Way to Use Library Functions
+
+Using library functions in @code{awk} can be very beneficial. It
+encourages code re-use and the writing of general functions. Programs are
+smaller, and therefore clearer.
+However, using library functions is only easy when writing @code{awk}
+programs; it is painful when running them, requiring multiple @samp{-f}
+options.  If @code{gawk} is unavailable, then so too is the @code{AWKPATH}
+environment variable and the ability to put @code{awk} functions into a
+library directory (@pxref{Options, ,Command Line Options}).
+
+It would be nice to be able to write programs like so:
+
+@example
+# library functions
+@@include getopt.awk
+@@include join.awk
+@dots{}
+
+# main program
+BEGIN @{
+    while ((c = getopt(ARGC, ARGV, "a:b:cde")) != -1)
+        @dots{}
+    @dots{}
+@}
+@end example
+
+The following program, @file{igawk.sh}, provides this service.
+It simulates @code{gawk}'s searching of the @code{AWKPATH} variable,
+and also allows @dfn{nested} includes; i.e.@: a file that has been included
+with @samp{@@include} can contain further @samp{@@include} statements.
+@code{igawk} will make an effort to only include files once, so that nested
+includes don't accidentally include a library function twice.
+
+@code{igawk} should behave externally just like @code{gawk}.  This means it
+should accept all of @code{gawk}'s command line arguments, including the
+ability to have multiple source files specified via @samp{-f}, and the
+ability to mix command line and library source files.
+
+The program is written using the POSIX Shell (@code{sh}) command language.
+The way the program works is as follows:
+
+@enumerate
+@item
+Loop through the arguments, saving anything that doesn't represent
+@code{awk} source code for later, when the expanded program is run.
+
+@item
+For any arguments that do represent @code{awk} text, put the arguments into
+a temporary file that will be expanded.  There are two cases.
+
+@enumerate a
+@item
+Literal text, provided with @samp{--source} or @samp{--source=}.  This
+text is just echoed directly.  The @code{echo} program will automatically
+supply a trailing newline.
+
+@item
+File names provided with @samp{-f}.  We use a neat trick, and echo
+@samp{@@include @var{filename}} into the temporary file.  Since the file
+inclusion program will work the way @code{gawk} does, this will get the text
+of the file included into the program at the correct point.
+@end enumerate
+
+@item
+Run an @code{awk} program (naturally) over the temporary file to expand
+@samp{@@include} statements.  The expanded program is placed in a second
+temporary file.
+
+@item
+Run the expanded program with @code{gawk} and any other original command line
+arguments that the user supplied (such as the data file names).
+@end enumerate
+
+The initial part of the program turns on shell tracing if the first
+argument was @samp{debug}.  Otherwise, a shell @code{trap} statement
+arranges to clean up any temporary files on program exit or upon an
+interrupt.
+
+@c 2e: For the temp file handling, go with Darrel's ig=${TMP:-/tmp}/igs.$$
+@c 2e: or something as similar as possible.
+
+The next part loops through all the command line arguments.
+There are several cases of interest.
+
+@table @code
+@item --
+This ends the arguments to @code{igawk}.  Anything else should be passed on
+to the user's @code{awk} program without being evaluated.
+
+@item -W
+This indicates that the next option is specific to @code{gawk}.  To make
+argument processing easier, the @samp{-W} is appended to the front of the
+remaining arguments and the loop continues.  (This is an @code{sh}
+programming trick.  Don't worry about it if you are not familiar with
+@code{sh}.)
+
+@item -v
+@itemx -F
+These are saved and passed on to @code{gawk}.
+
+@item -f
+@itemx --file
+@itemx --file=
+@itemx -Wfile=
+The file name is saved to the temporary file @file{/tmp/ig.s.$$} with an
+@samp{@@include} statement.
+The @code{sed} utility is used to remove the leading option part of the
+argument (e.g., @samp{--file=}).
+
+@item --source
+@itemx --source=
+@itemx -Wsource=
+The source text is echoed into @file{/tmp/ig.s.$$}.
+
+@item --version
+@itemx --version
+@itemx -Wversion
+@code{igawk} prints its version number, and runs @samp{gawk --version}
+to get the @code{gawk} version information, and then exits.
+@end table
+
+If none of @samp{-f}, @samp{--file}, @samp{-Wfile}, @samp{--source},
+or @samp{-Wsource}, were supplied, then the first non-option argument
+should be the @code{awk} program.  If there are no command line
+arguments left, @code{igawk} prints an error message and exits.
+Otherwise, the first argument is echoed into @file{/tmp/ig.s.$$}.
+
+In any case, after the arguments have been processed,
+@file{/tmp/ig.s.$$} contains the complete text of the original @code{awk}
+program.
+
+The @samp{$$} in @code{sh} represents the current process ID number.
+It is often used in shell programs to generate unique temporary file
+names.  This allows multiple users to run @code{igawk} without worrying
+that the temporary file names will clash.
+
+@cindex @code{sed} utility
+Here's the program:
+
+@findex igawk.sh
+@example
+@c @group
+@c file eg/prog/igawk.sh
+#! /bin/sh
+
+# igawk --- like gawk but do @@include processing
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# July 1993
+
+if [ "$1" = debug ]
+then
+    set -x
+    shift
+else
+    # cleanup on exit, hangup, interrupt, quit, termination
+    trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15
+fi
+
+while [ $# -ne 0 ] # loop over arguments
+do
+    case $1 in
+    --)     shift; break;;
+
+    -W)     shift
+            set -- -W"$@@"
+            continue;;
+
+    -[vF])  opts="$opts $1 '$2'"
+            shift;;
+
+    -[vF]*) opts="$opts '$1'" ;;
+
+    -f)     echo @@include "$2" >> /tmp/ig.s.$$
+            shift;;
+
+@group
+    -f*)    f=`echo "$1" | sed 's/-f//'`
+            echo @@include "$f" >> /tmp/ig.s.$$ ;;
+@end group
+
+    -?file=*)    # -Wfile or --file
+            f=`echo "$1" | sed 's/-.file=//'`
+            echo @@include "$f" >> /tmp/ig.s.$$ ;;
+
+    -?file)    # get arg, $2
+            echo @@include "$2" >> /tmp/ig.s.$$
+            shift;;
+
+    -?source=*)    # -Wsource or --source
+            t=`echo "$1" | sed 's/-.source=//'`
+            echo "$t" >> /tmp/ig.s.$$ ;;
+
+    -?source)  # get arg, $2
+            echo "$2" >> /tmp/ig.s.$$
+            shift;;
+
+    -?version)
+            echo igawk: version 1.0 1>&2
+            gawk --version
+            exit 0 ;;
+
+    -[W-]*)    opts="$opts '$1'" ;;
+
+    *)      break;;
+    esac
+    shift
+done
+
+if [ ! -s /tmp/ig.s.$$ ]
+then
+    if [ -z "$1" ]
+    then
+         echo igawk: no program! 1>&2
+         exit 1
+    else
+        echo "$1" > /tmp/ig.s.$$
+        shift
+    fi
+fi
+
+# at this point, /tmp/ig.s.$$ has the program
+@c endfile
+@c @end group
+@end example
+
+The @code{awk} program to process @samp{@@include} directives reads through
+the program, one line at a time using @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+The input file names and @samp{@@include} statements are managed using a
+stack.  As each @samp{@@include} is encountered, the current file name is
+``pushed'' onto the stack, and the file named in the @samp{@@include}
+directive becomes
+the current file name.  As each file is finished, the stack is ``popped,''
+and the previous input file becomes the current input file again.
+The process is started by making the original file the first one on the
+stack.
+
+The @code{pathto} function does the work of finding the full path to a
+file.  It simulates @code{gawk}'s behavior when searching the @code{AWKPATH}
+environment variable
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+If a file name has a @samp{/} in it, no path search
+is done. Otherwise, the file name is concatenated with the name of each
+directory in the path, and an attempt is made to open the generated file
+name.  The only way in @code{awk} to test if a file can be read is to go
+ahead and try to read it with @code{getline}; that is what @code{pathto}
+does.@footnote{On some very old versions of @code{awk}, the test
+@samp{getline junk < t} can loop forever if the file exists but is empty.
+Caveat Emptor.}
+If the file can be read, it is closed, and the file name is
+returned.
+@ignore
+An alternative way to test for the file's existence would be to call
+@samp{system("test -r " t)}, which uses the @code{test} utility to
+see if the file exists and is readable.  The disadvantage to this method
+is that it requires creating an extra process, and can thus be slightly
+slower.
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+gawk -- '
+# process @@include directives
+
+function pathto(file,    i, t, junk)
+@{
+    if (index(file, "/") != 0)
+        return file
+
+    for (i = 1; i <= ndirs; i++) @{
+        t = (pathlist[i] "/" file)
+        if ((getline junk < t) > 0) @{
+            # found it
+            close(t)
+            return t
+        @}
+    @}
+    return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The main program is contained inside one @code{BEGIN} rule.  The first thing it
+does is set up the @code{pathlist} array that @code{pathto} uses.  After
+splitting the path on @samp{:}, null elements are replaced with @code{"."},
+which represents the current directory.
+
+@example
+@group
+@c file eg/prog/igawk.sh
+BEGIN @{
+    path = ENVIRON["AWKPATH"]
+    ndirs = split(path, pathlist, ":")
+    for (i = 1; i <= ndirs; i++) @{
+        if (pathlist[i] == "")
+            pathlist[i] = "."
+    @}
+@c endfile
+@end group
+@end example
+
+The stack is initialized with @code{ARGV[1]}, which will be @file{/tmp/ig.s.$$}.
+The main loop comes next.  Input lines are read in succession. Lines that
+do not start with @samp{@@include} are printed verbatim.
+
+If the line does start with @samp{@@include}, the file name is in @code{$2}.
+@code{pathto} is called to generate the full path.  If it could not, then we
+print an error message and continue.
+
+The next thing to check is if the file has been included already.  The
+@code{processed} array is indexed by the full file name of each included
+file, and it tracks this information for us.  If the file has been
+seen, a warning message is printed. Otherwise, the new file name is
+pushed onto the stack and processing continues.
+
+Finally, when @code{getline} encounters the end of the input file, the file
+is closed and the stack is popped.  When @code{stackptr} is less than zero,
+the program is done.
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+    stackptr = 0
+    input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+    for (; stackptr >= 0; stackptr--) @{
+        while ((getline < input[stackptr]) > 0) @{
+            if (tolower($1) != "@@include") @{
+                print
+                continue
+            @}
+            fpath = pathto($2)
+            if (fpath == "") @{
+                printf("igawk:%s:%d: cannot find %s\n", \
+                    input[stackptr], FNR, $2) > "/dev/stderr"
+                continue
+            @}
+@group
+            if (! (fpath in processed)) @{
+                processed[fpath] = input[stackptr]
+                input[++stackptr] = fpath
+            @} else
+                print $2, "included in", input[stackptr], \
+                    "already included in", \
+                    processed[fpath] > "/dev/stderr"
+        @}
+@end group
+@group
+        close(input[stackptr])
+    @}
+@}' /tmp/ig.s.$$ > /tmp/ig.e.$$
+@end group
+@c endfile
+@c @end group
+@end example
+
+The last step is to call @code{gawk} with the expanded program and the original
+options and command line arguments that the user supplied.  @code{gawk}'s
+exit status is passed back on to @code{igawk}'s calling program.
+
+@c this causes more problems than it solves, so leave it out.
+@ignore
+The special file @file{/dev/null} is passed as a data file to @code{gawk}
+to handle an interesting case. Suppose that the user's program only has
+a @code{BEGIN} rule, and there are no data files to read. The program should exit without reading any data
+files.  However, suppose that an included library file defines an @code{END}
+rule of its own. In this case, @code{gawk} will hang, reading standard
+input. In order to avoid this, @file{/dev/null} is explicitly to the
+command line. Reading from @file{/dev/null} always returns an immediate
+end of file indication.
+
+@c Hmm. Add /dev/null if $# is 0?  Still messes up ARGV. Sigh.
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+eval gawk -f /tmp/ig.e.$$ $opts -- "$@@"
+
+exit $?
+@c endfile
+@c @end group
+@end example
+
+This version of @code{igawk} represents my third attempt at this program.
+There are three key simplifications that made the program work better.
+
+@enumerate
+@item
+Using @samp{@@include} even for the files named with @samp{-f} makes building
+the initial collected @code{awk} program much simpler; all the
+@samp{@@include} processing can be done once.
+
+@item
+The @code{pathto} function doesn't try to save the line read with
+@code{getline} when testing for the file's accessibility.  Trying to save
+this line for use with the main program complicates things considerably.
+@c what problem does this engender though - exercise
+@c answer, reading from "-" or /dev/stdin
+
+@item
+Using a @code{getline} loop in the @code{BEGIN} rule does it all in one
+place.  It is not necessary to call out to a separate loop for processing
+nested @samp{@@include} statements.
+@end enumerate
+
+Also, this program illustrates that it is often worthwhile to combine
+@code{sh} and @code{awk} programming together.  You can usually accomplish
+quite a lot, without having to resort to low-level programming in C or C++, and it
+is frequently easier to do certain kinds of string and argument manipulation
+using the shell than it is in @code{awk}.
+
+Finally, @code{igawk} shows that it is not always necessary to add new
+features to a program; they can often be layered on top.  With @code{igawk},
+there is no real reason to build @samp{@@include} processing into
+@code{gawk} itself.
+
+As an additional example of this, consider the idea of having two
+files in a directory in the search path.
+
+@table @file
+@item default.awk
+This file would contain a set of default library functions, such
+as @code{getopt} and @code{assert}.
+
+@item site.awk
+This file would contain library functions that are specific to a site or
+installation, i.e.@: locally developed functions.
+Having a separate file allows @file{default.awk} to change with
+new @code{gawk} releases, without requiring the system administrator to
+update it each time by adding the local functions.
+@end table
+
+One user
+@c Karl Berry, karl@ileaf.com, 10/95
+suggested that @code{gawk} be modified to automatically read these files
+upon startup.  Instead, it would be very simple to modify @code{igawk}
+to do this. Since @code{igawk} can process nested @samp{@@include}
+directives, @file{default.awk} could simply contain @samp{@@include}
+statements for the desired library functions.
+
+@c Exercise: make this change
+
+@node Language History, Gawk Summary, Sample Programs, Top
+@chapter The Evolution of the @code{awk} Language
+
+This @value{DOCUMENT} describes the GNU implementation of @code{awk}, which follows
+the POSIX specification.  Many @code{awk} users are only familiar
+with the original @code{awk} implementation in Version 7 Unix.
+(This implementation was the basis for @code{awk} in Berkeley Unix,
+through 4.3--Reno.  The 4.4 release of Berkeley Unix uses @code{gawk} 2.15.2
+for its version of @code{awk}.) This chapter briefly describes the
+evolution of the @code{awk} language, with cross references to other parts
+of the @value{DOCUMENT} where you can find more information.
+
+@menu
+* V7/SVR3.1::                   The major changes between V7 and System V
+                                Release 3.1.
+* SVR4::                        Minor changes between System V Releases 3.1
+                                and 4.
+* POSIX::                       New features from the POSIX standard.
+* BTL::                         New features from the Bell Laboratories
+                                version of @code{awk}.
+* POSIX/GNU::                   The extensions in @code{gawk} not in POSIX
+                                @code{awk}.
+@end menu
+
+@node V7/SVR3.1, SVR4, Language History, Language History
+@section Major Changes between V7 and SVR3.1
+
+The @code{awk} language evolved considerably between the release of
+Version 7 Unix (1978) and the new version first made generally available in
+System V Release 3.1 (1987).  This section summarizes the changes, with
+cross-references to further details.
+
+@itemize @bullet
+@item
+The requirement for @samp{;} to separate rules on a line
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@item
+User-defined functions, and the @code{return} statement
+(@pxref{User-defined, ,User-defined Functions}).
+
+@item
+The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
+
+@item
+The @code{do}-@code{while} statement
+(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).
+
+@item
+The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
+@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The built-in functions @code{gsub}, @code{sub}, and @code{match}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+The built-in functions @code{close}, and @code{system}
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@item
+The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
+and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
+
+@item
+The conditional expression using the ternary operator @samp{?:}
+(@pxref{Conditional Exp, ,Conditional Expressions}).
+
+@item
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+C-compatible operator precedence, which breaks some old @code{awk}
+programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
+
+@item
+Regexps as the value of @code{FS}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}), and as the
+third argument to the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@item
+The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
+(@pxref{Escape Sequences}).
+(Some vendors have updated their old versions of @code{awk} to
+recognize @samp{\r}, @samp{\b}, and @samp{\f}, but this is not
+something you can rely on.)
+
+@item
+Redirection of input for the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+
+@item
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+
+@item
+Multi-dimensional arrays
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).
+@end itemize
+
+@node SVR4, POSIX, V7/SVR3.1, Language History
+@section Changes between SVR3.1 and SVR4
+
+@cindex @code{awk} language, V.4 version
+The System V Release 4 version of Unix @code{awk} added these features
+(some of which originated in @code{gawk}):
+
+@itemize @bullet
+@item
+The @code{ENVIRON} variable (@pxref{Built-in Variables}).
+
+@item
+Multiple @samp{-f} options on the command line
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-v} option for assigning variables before program execution begins
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{--} option for terminating command line options.
+
+@item
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Escape Sequences}).
+
+@item
+A defined return value for the @code{srand} built-in function
+(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The @code{toupper} and @code{tolower} built-in string functions
+for case translation
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+A cleaner specification for the @samp{%c} format-control letter in the
+@code{printf} function
+(@pxref{Control Letters, ,Format-Control Letters}).
+
+@item
+The ability to dynamically pass the field width and precision (@code{"%*.*d"})
+in the argument list of the @code{printf} function
+(@pxref{Control Letters, ,Format-Control Letters}).
+
+@item
+The use of regexp constants such as @code{/foo/} as expressions, where
+they are equivalent to using the matching operator, as in @samp{$0 ~ /foo/}
+(@pxref{Using Constant Regexps, ,Using Regular Expression Constants}).
+@end itemize
+
+@node POSIX, BTL, SVR4, Language History
+@section Changes between SVR4 and POSIX @code{awk}
+
+The POSIX Command Language and Utilities standard for @code{awk}
+introduced the following changes into the language:
+
+@itemize @bullet
+@item
+The use of @samp{-W} for implementation-specific options.
+
+@item
+The use of @code{CONVFMT} for controlling the conversion of numbers
+to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@item
+The concept of a numeric string, and tighter comparison rules to go
+with it (@pxref{Typing and Comparison, ,Variable Typing and Comparison Expressions}).
+
+@item
+More complete documentation of many of the previously undocumented
+features of the language.
+@end itemize
+
+The following common extensions are not permitted by the POSIX
+standard:
+
+@c IMPORTANT! Keep this list in sync with the one in node Options
+
+@itemize @bullet
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Escape Sequences}).
+
+@item
+Newlines do not act as whitespace to separate fields when @code{FS} is
+equal to a single space.
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Function Definition Syntax}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item
+The @code{fflush} built-in function is not supported
+(@pxref{I/O Functions, , Built-in Functions for Input/Output}).
+@end itemize
+
+@node BTL, POSIX/GNU, POSIX, Language History
+@section Extensions in the Bell Laboratories @code{awk}
+
+@cindex Kernighan, Brian
+Brian Kernighan, one of the original designers of Unix @code{awk},
+has made his version available via anonymous @code{ftp}
+(@pxref{Other Versions, ,Other Freely Available @code{awk} Implementations}).
+This section describes extensions in his version of @code{awk} that are
+not in POSIX @code{awk}.
+
+@itemize @bullet
+@item
+The @samp{-mf @var{NNN}} and @samp{-mr @var{NNN}} command line options
+to set the maximum number of fields, and the maximum
+record size, respectively
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @code{fflush} built-in function for flushing buffered output
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@ignore
+@item
+The @code{SYMTAB} array, that allows access to the internal symbol
+table of @code{awk}. This feature is not documented, largely because
+it is somewhat shakily implemented. For instance, you cannot access arrays
+or array elements through it.
+@end ignore
+@end itemize
+
+@node POSIX/GNU, , BTL, Language History
+@section Extensions in @code{gawk} Not in POSIX @code{awk}
+
+@cindex compatibility mode
+The GNU implementation, @code{gawk}, adds a number of features.
+This sections lists them in the order they were added to @code{gawk}.
+They can all be disabled with either the @samp{--traditional} or
+@samp{--posix} options
+(@pxref{Options, ,Command Line Options}).
+
+Version 2.10 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{AWKPATH} environment variable for specifying a path search for
+the @samp{-f} command line option
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).
+
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
+@file{/dev/fd/@var{n}} file name interpretation
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+@end itemize
+
+Version 2.13 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size, ,Reading Fixed-width Data}).
+
+@item
+The @code{systime} and @code{strftime} built-in functions for obtaining
+and printing time stamps
+(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).
+
+@item
+The @samp{-W lint} option to provide source code and run time error
+and portability checking
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-W compat} option to turn off these extensions
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-W posix} option for full POSIX compliance
+(@pxref{Options, ,Command Line Options}).
+@end itemize
+
+Version 2.14 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{next file} statement for skipping to the next data file
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+@end itemize
+
+Version 2.15 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{ARGIND} variable, that tracks the movement of @code{FILENAME}
+through @code{ARGV}  (@pxref{Built-in Variables}).
+
+@item
+The @code{ERRNO} variable, that contains the system error message when
+@code{getline} returns @minus{}1, or when @code{close} fails
+(@pxref{Built-in Variables}).
+
+@item
+The ability to use GNU-style long named options that start with @samp{--}
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{--source} option for mixing command line and library
+file source code
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and
+@file{/dev/user} file name interpretation
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+@end itemize
+
+Version 3.0 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{next file} statement became @code{nextfile}
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+
+@item
+The @samp{--lint-old} option to
+warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}
+(@pxref{V7/SVR3.1, , Major Changes between V7 and SVR3.1}).
+
+@item
+The @samp{--traditional} option was added as a better name for
+@samp{--compat} (@pxref{Options, ,Command Line Options}).
+
+@item
+The ability for @code{FS} to be a null string, and for the third
+argument to @code{split} to be the null string
+(@pxref{Single Character Fields, , Making Each Character a Separate Field}).
+
+@item
+The ability for @code{RS} to be a regexp
+(@pxref{Records, , How Input is Split into Records}).
+
+@item
+The @code{RT} variable
+(@pxref{Records, , How Input is Split into Records}).
+
+@item
+The @code{gensub} function for more powerful text manipulation
+(@pxref{String Functions, , Built-in Functions for String Manipulation}).
+
+@item
+The @code{strftime} function acquired a default time format,
+allowing it to be called with no arguments
+(@pxref{Time Functions,  , Functions for Dealing with Time Stamps}).
+
+@item
+Full support for both POSIX and GNU regexps
+(@pxref{Regexp, , Regular Expressions}).
+
+@item
+The @samp{--re-interval} option to provide interval expressions in regexps
+(@pxref{Regexp Operators, , Regular Expression Operators}).
+
+@item
+@code{IGNORECASE} changed, now applying to string comparison as well
+as regexp operations
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).
+
+@item
+The @samp{-m} option and the @code{fflush} function from the
+Bell Labs research version of @code{awk}
+(@pxref{Options, ,Command Line Options}; also
+@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@item
+The use of GNU Autoconf to control the configuration process
+(@pxref{Quick Installation, , Compiling @code{gawk} for Unix}).
+
+@item
+Amiga support
+(@pxref{Amiga Installation, ,Installing @code{gawk} on an Amiga}).
+
+@c XXX ADD MORE STUFF HERE
+
+@end itemize
+
+@node Gawk Summary, Installation, Language History, Top
+@appendix @code{gawk} Summary
+
+This appendix provides a brief summary of the @code{gawk} command line and the
+@code{awk} language.  It is designed to serve as ``quick reference.''  It is
+therefore terse, but complete.
+
+@menu
+* Command Line Summary::        Recapitulation of the command line.
+* Language Summary::            A terse review of the language.
+* Variables/Fields::            Variables, fields, and arrays.
+* Rules Summary::               Patterns and Actions, and their component
+                                parts.
+* Actions Summary::             Quick overview of actions.
+* Functions Summary::           Defining and calling functions.
+* Historical Features::         Some undocumented but supported ``features''.
+@end menu
+
+@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
+@appendixsec Command Line Options Summary
+
+The command line consists of options to @code{gawk} itself, the
+@code{awk} program text (if not supplied via the @samp{-f} option), and
+values to be made available in the @code{ARGC} and @code{ARGV}
+predefined @code{awk} variables:
+
+@example
+gawk @r{[@var{POSIX or GNU style options}]} -f @var{source-file} @r{[@code{--}]} @var{file} @dots{}
+gawk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+The options that @code{gawk} accepts are:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+Use @var{fs} for the input field separator (the value of the @code{FS}
+predefined variable).
+
+@item -f @var{program-file}
+@itemx --file @var{program-file}
+Read the @code{awk} program source from the file @var{program-file}, instead
+of from the first command line argument.
+
+@item -mf @var{NNN}
+@itemx -mr @var{NNN}
+The @samp{f} flag sets
+the maximum number of fields, and the @samp{r} flag sets the maximum
+record size.  These options are ignored by @code{gawk}, since @code{gawk}
+has no predefined limits; they are only for compatibility with the
+Bell Labs research version of Unix @code{awk}.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+Assign the variable @var{var} the value @var{val} before program execution
+begins.
+
+@item -W traditional
+@itemx -W compat
+@itemx --traditional
+@itemx --compat
+Use compatibility mode, in which @code{gawk} extensions are turned
+off.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+Print the short version of the General Public License on the standard
+output, and exit.  This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+Print a relatively short summary of the available options on the standard
+output, and exit.
+
+@item -W lint
+@itemx --lint
+Give warnings about dubious or non-portable @code{awk} constructs.
+
+@item -W lint-old
+@itemx --lint-old
+Warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}.
+
+@item -W posix
+@itemx --posix
+Use POSIX compatibility mode, in which @code{gawk} extensions
+are turned off and additional restrictions apply.
+
+@item -W re-interval
+@itemx --re-interval
+Allow interval expressions
+(@pxref{Regexp Operators, , Regular Expression Operators}),
+in regexps.
+
+@item -W source=@var{program-text}
+@itemx --source @var{program-text}
+Use @var{program-text} as @code{awk} program source code.  This option allows
+mixing command line source code with source code from files, and is
+particularly useful for mixing command line programs with library functions.
+
+@item -W version
+@itemx --version
+Print version information for this particular copy of @code{gawk} on the error
+output.
+
+@item --
+Signal the end of options.  This is useful to allow further arguments to the
+@code{awk} program itself to start with a @samp{-}.  This is mainly for
+consistency with POSIX argument parsing conventions.
+@end table
+
+Any other options are flagged as invalid, but are otherwise ignored.
+@xref{Options, ,Command Line Options}, for more details.
+
+@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
+@appendixsec Language Summary
+
+An @code{awk} program consists of a sequence of zero or more pattern-action
+statements and optional function definitions.  One or the other of the
+pattern and action may be omitted.
+
+@example
+@var{pattern}    @{ @var{action statements} @}
+@var{pattern}
+          @{ @var{action statements} @}
+
+function @var{name}(@var{parameter list})     @{ @var{action statements} @}
+@end example
+
+@code{gawk} first reads the program source from the
+@var{program-file}(s), if specified, or from the first non-option
+argument on the command line.  The @samp{-f} option may be used multiple
+times on the command line.  @code{gawk} reads the program text from all
+the @var{program-file} files, effectively concatenating them in the
+order they are specified.  This is useful for building libraries of
+@code{awk} functions, without having to include them in each new
+@code{awk} program that uses them.  To use a library function in a file
+from a program typed in on the command line, specify
+@samp{--source '@var{program}'}, and type your program in between the single
+quotes.
+@xref{Options, ,Command Line Options}.
+
+The environment variable @code{AWKPATH} specifies a search path to use
+when finding source files named with the @samp{-f} option.  The default
+path, which is
+@samp{.:/usr/local/share/awk}@footnote{The path may use a directory
+other than @file{/usr/local/share/awk}, depending upon how @code{gawk}
+was built and installed.} is used if @code{AWKPATH} is not set.
+If a file name given to the @samp{-f} option contains a @samp{/} character,
+no path search is performed.
+@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+
+@code{gawk} compiles the program into an internal form, and then proceeds to
+read each file named in the @code{ARGV} array.
+The initial values of @code{ARGV} come from the command line arguments.
+If there are no files named
+on the command line, @code{gawk} reads the standard input.
+
+If a ``file'' named on the command line has the form
+@samp{@var{var}=@var{val}}, it is treated as a variable assignment: the
+variable @var{var} is assigned the value @var{val}.
+If any of the files have a value that is the null string, that
+element in the list is skipped.
+
+For each record in the input, @code{gawk} tests to see if it matches any
+@var{pattern} in the @code{awk} program.  For each pattern that the record
+matches, the associated @var{action} is executed.
+
+@node Variables/Fields, Rules Summary, Language Summary, Gawk Summary
+@appendixsec Variables and Fields
+
+@code{awk} variables are not declared; they come into existence when they are
+first used.  Their values are either floating-point numbers or strings.
+@code{awk} also has one-dimensional arrays; multiple-dimensional arrays
+may be simulated.  There are several predefined variables that
+@code{awk} sets as a program runs; these are summarized below.
+
+@menu
+* Fields Summary::              Input field splitting.
+* Built-in Summary::            @code{awk}'s built-in variables.
+* Arrays Summary::              Using arrays.
+* Data Type Summary::           Values in @code{awk} are numbers or strings.
+@end menu
+
+@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
+@appendixsubsec Fields
+
+As each input line is read, @code{gawk} splits the line into
+@var{fields}, using the value of the @code{FS} variable as the field
+separator.  If @code{FS} is a single character, fields are separated by
+that character.  Otherwise, @code{FS} is expected to be a full regular
+expression.  In the special case that @code{FS} is a single space,
+fields are separated by runs of spaces, tabs and/or newlines.@footnote{In
+POSIX @code{awk}, newline does not separate fields.}
+If @code{FS} is the null string (@code{""}), then each individual
+character in the record becomes a separate field.
+Note that the value
+of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
+also affects how fields are split when @code{FS} is a regular expression.
+
+Each field in the input line may be referenced by its position, @code{$1},
+@code{$2}, and so on.  @code{$0} is the whole line.  The value of a field may
+be assigned to as well.  Field numbers need not be constants:
+
+@example
+n = 5
+print $n
+@end example
+
+@noindent
+prints the fifth field in the input line.  The variable @code{NF} is set to
+the total number of fields in the input line.
+
+References to non-existent fields (i.e.@: fields after @code{$NF}) return
+the null string.  However, assigning to a non-existent field (e.g.,
+@code{$(NF+2) = 5}) increases the value of @code{NF}, creates any
+intervening fields with the null string as their value, and causes the
+value of @code{$0} to be recomputed, with the fields being separated by
+the value of @code{OFS}.
+Decrementing @code{NF} causes the values of fields past the new value to
+be lost, and the value of @code{$0} to be recomputed, with the fields being
+separated by the value of @code{OFS}.
+@xref{Reading Files, ,Reading Input Files}.
+
+@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
+@appendixsubsec Built-in Variables
+
+@code{gawk}'s built-in variables are:
+
+@table @code
+@item ARGC
+The number of elements in @code{ARGV}. See below for what is actually
+included in @code{ARGV}.
+
+@item ARGIND
+The index in @code{ARGV} of the current file being processed.
+When @code{gawk} is processing the input data files,
+it is always true that @samp{FILENAME == ARGV[ARGIND]}.
+
+@item ARGV
+The array of command line arguments.  The array is indexed from zero to
+@code{ARGC} @minus{} 1.  Dynamically changing @code{ARGC} and
+the contents of @code{ARGV}
+can control the files used for data.  A null-valued element in
+@code{ARGV} is ignored. @code{ARGV} does not include the options to
+@code{awk} or the text of the @code{awk} program itself.
+
+@item CONVFMT
+The conversion format to use when converting numbers to strings.
+
+@item FIELDWIDTHS
+A space separated list of numbers describing the fixed-width input data.
+
+@item ENVIRON
+An array of environment variable values.  The array
+is indexed by variable name, each element being the value of that
+variable.  Thus, the environment variable @code{HOME} is
+@code{ENVIRON["HOME"]}.  One possible value might be @file{/home/arnold}.
+
+Changing this array does not affect the environment seen by programs
+which @code{gawk} spawns via redirection or the @code{system} function.
+(This may change in a future version of @code{gawk}.)
+
+Some operating systems do not have environment variables.
+The @code{ENVIRON} array is empty when running on these systems.
+
+@item ERRNO
+The system error message when an error occurs using @code{getline}
+or @code{close}.
+
+@item FILENAME
+The name of the current input file.  If no files are specified on the command
+line, the value of @code{FILENAME} is the null string.
+
+@item FNR
+The input record number in the current input file.
+
+@item FS
+The input field separator, a space by default.
+
+@item IGNORECASE
+The case-sensitivity flag for string comparisons and regular expression
+operations.  If @code{IGNORECASE} has a non-zero value, then pattern
+matching in rules, record separating with @code{RS}, field splitting
+with @code{FS}, regular expression matching with @samp{~} and
+@samp{!~}, and the @code{gensub}, @code{gsub}, @code{index},
+@code{match}, @code{split} and @code{sub} built-in functions all
+ignore case when doing regular expression operations, and all string
+comparisons are done ignoring case.
+The value of @code{IGNORECASE} does @emph{not} affect array subscripting.
+
+@item NF
+The number of fields in the current input record.
+
+@item NR
+The total number of input records seen so far.
+
+@item OFMT
+The output format for numbers for the @code{print} statement,
+@code{"%.6g"} by default.
+
+@item OFS
+The output field separator, a space by default.
+
+@item ORS
+The output record separator, by default a newline.
+
+@item RS
+The input record separator, by default a newline.
+If @code{RS} is set to the null string, then records are separated by
+blank lines.  When @code{RS} is set to the null string, then the newline
+character always acts as a field separator, in addition to whatever value
+@code{FS} may have.  If @code{RS} is set to a multi-character
+string, it denotes a regexp; input text matching the regexp
+separates records.
+
+@item RT
+The input text that matched the text denoted by @code{RS},
+the record separator.
+
+@item RSTART
+The index of the first character last matched by @code{match}; zero if no match.
+
+@item RLENGTH
+The length of the string last matched by @code{match}; @minus{}1 if no match.
+
+@item SUBSEP
+The string used to separate multiple subscripts in array elements, by
+default @code{"\034"}.
+@end table
+
+@xref{Built-in Variables}, for more information.
+
+@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
+@appendixsubsec Arrays
+
+Arrays are subscripted with an expression between square brackets
+(@samp{[} and @samp{]}).  Array subscripts are @emph{always} strings;
+numbers are converted to strings as necessary, following the standard
+conversion rules
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+If you use multiple expressions separated by commas inside the square
+brackets, then the array subscript is a string consisting of the
+concatenation of the individual subscript values, converted to strings,
+separated by the subscript separator (the value of @code{SUBSEP}).
+
+The special operator @code{in} may be used in a conditional context
+to see if an array has an index consisting of a particular value.
+
+@example
+if (val in array)
+        print array[val]
+@end example
+
+If the array has multiple subscripts, use @samp{(i, j, @dots{}) in @var{array}}
+to test for existence of an element.
+
+The @code{in} construct may also be used in a @code{for} loop to iterate
+over all the elements of an array.
+@xref{Scanning an Array, ,Scanning All Elements of an Array}.
+
+You can remove an element from an array using the @code{delete} statement.
+
+You can clear an entire array using @samp{delete @var{array}}.
+
+@xref{Arrays, ,Arrays in @code{awk}}.
+
+@node Data Type Summary,  , Arrays Summary, Variables/Fields
+@appendixsubsec Data Types
+
+The value of an @code{awk} expression is always either a number
+or a string.
+
+Some contexts (such as arithmetic operators) require numeric
+values.  They convert strings to numbers by interpreting the text
+of the string as a number.  If the string does not look like a
+number, it converts to zero.
+
+Other contexts (such as concatenation) require string values.
+They convert numbers to strings by effectively printing them
+with @code{sprintf}.
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.
+
+To force conversion of a string value to a number, simply add zero
+to it.  If the value you start with is already a number, this
+does not change it.
+
+To force conversion of a numeric value to a string, concatenate it with
+the null string.
+
+Comparisons are done numerically if both operands are numeric, or if
+one is numeric and the other is a numeric string.  Otherwise one or
+both operands are converted to strings and a string comparison is
+performed.  Fields, @code{getline} input, @code{FILENAME}, @code{ARGV}
+elements, @code{ENVIRON} elements and the elements of an array created
+by @code{split} are the only items that can be numeric strings. String
+constants, such as @code{"3.1415927"} are not numeric strings, they are
+string constants.  The full rules for comparisons are described in
+@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+Uninitialized variables have the string value @code{""} (the null, or
+empty, string).  In contexts where a number is required, this is
+equivalent to zero.
+
+@xref{Variables}, for more information on variable naming and initialization;
+@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
+on how variable values are interpreted.
+
+@node Rules Summary, Actions Summary, Variables/Fields, Gawk Summary
+@appendixsec Patterns
+
+@menu
+* Pattern Summary::             Quick overview of patterns.
+* Regexp Summary::              Quick overview of regular expressions.
+@end menu
+
+An @code{awk} program is mostly composed of rules, each consisting of a
+pattern followed by an action.  The action is enclosed in @samp{@{} and
+@samp{@}}.  Either the pattern may be missing, or the action may be
+missing, but not both.  If the pattern is missing, the
+action is executed for every input record.  A missing action is
+equivalent to @samp{@w{@{ print @}}}, which prints the entire line.
+
+@c These paragraphs repeated for both patterns and actions. I don't
+@c like this, but I also don't see any way around it. Update both copies
+@c if they need fixing.
+Comments begin with the @samp{#} character, and continue until the end of the
+line.  Blank lines may be used to separate statements.  Statements normally
+end with a newline; however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}.  Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line.  In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.
+
+Multiple statements may be put on one line by separating each one with
+a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.
+
+@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
+@appendixsubsec Pattern Summary
+
+@code{awk} patterns may be one of the following:
+
+@example
+/@var{regular expression}/
+@var{relational expression}
+@var{pattern} && @var{pattern}
+@var{pattern} || @var{pattern}
+@var{pattern} ? @var{pattern} : @var{pattern}
+(@var{pattern})
+! @var{pattern}
+@var{pattern1}, @var{pattern2}
+BEGIN
+END
+@end example
+
+@code{BEGIN} and @code{END} are two special kinds of patterns that are not
+tested against the input.  The action parts of all @code{BEGIN} rules are
+concatenated as if all the statements had been written in a single @code{BEGIN}
+rule.  They are executed before any of the input is read.  Similarly, all the
+@code{END} rules are concatenated, and executed when all the input is exhausted (or
+when an @code{exit} statement is executed).  @code{BEGIN} and @code{END}
+patterns cannot be combined with other patterns in pattern expressions.
+@code{BEGIN} and @code{END} rules cannot have missing action parts.
+
+For @code{/@var{regular-expression}/} patterns, the associated statement is
+executed for each input record that matches the regular expression.  Regular
+expressions are summarized below.
+
+A @var{relational expression} may use any of the operators defined below in
+the section on actions.  These generally test whether certain fields match
+certain regular expressions.
+
+The @samp{&&}, @samp{||}, and @samp{!} operators are logical ``and,''
+logical ``or,'' and logical ``not,'' respectively, as in C.  They do
+short-circuit evaluation, also as in C, and are used for combining more
+primitive pattern expressions.  As in most languages, parentheses may be
+used to change the order of evaluation.
+
+The @samp{?:} operator is like the same operator in C.  If the first
+pattern matches, then the second pattern is matched against the input
+record; otherwise, the third is matched.  Only one of the second and
+third patterns is matched.
+
+The @samp{@var{pattern1}, @var{pattern2}} form of a pattern is called a
+range pattern.  It matches all input lines starting with a line that
+matches @var{pattern1}, and continuing until a line that matches
+@var{pattern2}, inclusive.  A range pattern cannot be used as an operand
+of any of the pattern operators.
+
+@xref{Pattern Overview, ,Pattern Elements}.
+
+@node Regexp Summary, , Pattern Summary, Rules Summary
+@appendixsubsec Regular Expressions
+
+Regular expressions are based on POSIX EREs (extended regular expressions).
+The escape sequences allowed in string constants are also valid in
+regular expressions (@pxref{Escape Sequences}).
+Regexps are composed of characters as follows:
+
+@table @code
+@item @var{c}
+matches the character @var{c} (assuming @var{c} is none of the characters
+listed below).
+
+@item \@var{c}
+matches the literal character @var{c}.
+
+@item .
+matches any character, @emph{including} newline.
+In strict POSIX mode, @samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+
+@item ^
+matches the beginning of a string.
+
+@item $
+matches the end of a string.
+
+@item [@var{abc}@dots{}]
+matches any of the characters @var{abc}@dots{} (character list).
+
+@item [[:@var{class}:]]
+matches any character in the character class @var{class}. Allowable classes
+are @code{alnum}, @code{alpha}, @code{blank}, @code{cntrl},
+@code{digit}, @code{graph}, @code{lower}, @code{print}, @code{punct},
+@code{space}, @code{upper}, and @code{xdigit}.
+
+@item [[.@var{symbol}.]]
+matches the multi-character collating symbol @var{symbol}.
+@code{gawk} does not currently support collating symbols.
+
+@item [[=@var{classname}=]]
+matches any of the equivalent characters in the current locale named by the
+equivalence class @var{classname}.
+@code{gawk} does not currently support equivalence classes.
+
+@item [^@var{abc}@dots{}]
+matches any character except @var{abc}@dots{} (negated
+character list).
+
+@item @var{r1}|@var{r2}
+matches either @var{r1} or @var{r2} (alternation).
+
+@item @var{r1r2}
+matches @var{r1}, and then @var{r2} (concatenation).
+
+@item @var{r}+
+matches one or more @var{r}'s.
+
+@item @var{r}*
+matches zero or more @var{r}'s. 
+
+@item @var{r}?
+matches zero or one @var{r}'s. 
+
+@item (@var{r})
+matches @var{r} (grouping).
+
+@item @var{r}@{@var{n}@}
+@itemx @var{r}@{@var{n},@}
+@itemx @var{r}@{@var{n},@var{m}@}
+matches at least @var{n}, @var{n} to any number, or @var{n} to @var{m}
+occurrences of @var{r} (interval expressions).
+
+@item \y
+matches the empty string at either the beginning or the
+end of a word.
+
+@item \B
+matches the empty string within a word.
+
+@item \<
+matches the empty string at the beginning of a word.
+
+@item \>
+matches the empty string at the end of a word.
+
+@item \w
+matches any word-constituent character (alphanumeric characters and
+the underscore).
+
+@item \W
+matches any character that is not word-constituent.
+
+@item \`
+matches the empty string at the beginning of a buffer (same as a string
+in @code{gawk}).
+
+@item \'
+matches the empty string at the end of a buffer.
+@end table
+
+The various command line options
+control how @code{gawk} interprets characters in regexps.
+
+@c NOTE!!! Keep this in sync with the same table in the regexp chapter!
+@table @asis
+@item No options
+In the default case, @code{gawk} provide all the facilities of
+POSIX regexps and the GNU regexp operators described above.
+However, interval expressions are not supported.
+
+@item @code{--posix}
+Only POSIX regexps are supported, the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}).  Interval expressions
+are allowed.
+
+@item @code{--traditional}
+Traditional Unix @code{awk} regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes (@code{[[:alnum:]]} and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, even if @samp{--traditional}
+has been provided.
+@end table
+
+@xref{Regexp, ,Regular Expressions}.
+
+@node Actions Summary, Functions Summary, Rules Summary, Gawk Summary
+@appendixsec Actions
+
+Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
+A missing action statement is equivalent to @samp{@w{@{ print @}}}.
+
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages.  The operators, control statements,
+and Input/Output statements available are similar to those in C.
+
+@c These paragraphs repeated for both patterns and actions. I don't
+@c like this, but I also don't see any way around it. Update both copies
+@c if they need fixing.
+Comments begin with the @samp{#} character, and continue until the end of the
+line.  Blank lines may be used to separate statements.  Statements normally
+end with a newline; however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}.  Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line.  In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.
+
+Multiple statements may be put on one line by separating each one with
+a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.
+
+@menu
+* Operator Summary::            @code{awk} operators.
+* Control Flow Summary::        The control statements.
+* I/O Summary::                 The I/O statements.
+* Printf Summary::              A summary of @code{printf}.
+* Special File Summary::        Special file names interpreted internally.
+* Built-in Functions Summary::  Built-in numeric and string functions.
+* Time Functions Summary::      Built-in time functions.
+* String Constants Summary::    Escape sequences in strings.
+@end menu
+
+@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
+@appendixsubsec Operators
+
+The operators in @code{awk}, in order of decreasing precedence, are:
+
+@table @code
+@item (@dots{})
+Grouping.
+
+@item $
+Field reference.
+
+@item ++ --
+Increment and decrement, both prefix and postfix.
+
+@item ^
+Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
+operator, but they are not specified in the POSIX standard).
+
+@item + - !
+Unary plus, unary minus, and logical negation.
+
+@item * / %
+Multiplication, division, and modulus.
+
+@item + -
+Addition and subtraction.
+
+@item @var{space}
+String concatenation.
+
+@item < <= > >= != ==
+The usual relational operators.
+
+@item ~ !~
+Regular expression match, negated match.
+
+@item in
+Array membership.
+
+@item &&
+Logical ``and''.
+
+@item ||
+Logical ``or''.
+
+@item ?:
+A conditional expression.  This has the form @samp{@var{expr1} ?
+@var{expr2} : @var{expr3}}.  If @var{expr1} is true, the value of the
+expression is @var{expr2}; otherwise it is @var{expr3}.  Only one of
+@var{expr2} and @var{expr3} is evaluated.
+
+@item = += -= *= /= %= ^=
+Assignment.  Both absolute assignment (@code{@var{var}=@var{value}})
+and operator assignment (the other forms) are supported.
+@end table
+
+@xref{Expressions}.
+
+@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
+@appendixsubsec Control Statements
+
+The control statements are as follows:
+
+@example
+if (@var{condition}) @var{statement} @r{[} else @var{statement} @r{]}
+while (@var{condition}) @var{statement}
+do @var{statement} while (@var{condition})
+for (@var{expr1}; @var{expr2}; @var{expr3}) @var{statement}
+for (@var{var} in @var{array}) @var{statement}
+break
+continue
+delete @var{array}[@var{index}]
+delete @var{array}
+exit @r{[} @var{expression} @r{]}
+@{ @var{statements} @}
+@end example
+
+@xref{Statements, ,Control Statements in Actions}.
+
+@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
+@appendixsubsec I/O Statements
+
+The Input/Output statements are as follows:
+
+@table @code
+@item getline
+Set @code{$0} from next input record; set @code{NF}, @code{NR}, @code{FNR}.
+@xref{Getline, ,Explicit Input with @code{getline}}.
+
+@item getline <@var{file}
+Set @code{$0} from next record of @var{file}; set @code{NF}.
+
+@item getline @var{var}
+Set @var{var} from next input record; set @code{NR}, @code{FNR}.
+
+@item getline @var{var} <@var{file}
+Set @var{var} from next record of @var{file}.
+
+@item @var{command} | getline
+Run @var{command}, piping its output into @code{getline}; sets @code{$0},
+@code{NF}, @code{NR}.
+
+@item @var{command} | getline @code{var}
+Run @var{command}, piping its output into @code{getline}; sets @var{var}.
+
+@item next
+Stop processing the current input record.  The next input record is read and
+processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+@xref{Next Statement, ,The @code{next} Statement}.
+
+@item nextfile
+Stop processing the current input file.  The next input record read comes
+from the next input file.  @code{FILENAME} is updated, @code{FNR} is set to one,
+@code{ARGIND} is incremented, 
+and processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+Earlier versions of @code{gawk} used @samp{next file}; this usage is still
+supported, but is considered to be deprecated.
+@xref{Nextfile Statement, ,The @code{nextfile} Statement}.
+
+@item print
+Prints the current record.
+@xref{Printing, ,Printing Output}.
+
+@item print @var{expr-list}
+Prints expressions.
+
+@item print @var{expr-list} > @var{file}
+Prints expressions to @var{file}. If @var{file} does not exist, it is
+created. If it does exist, its contents are deleted the first time the
+@code{print} is executed.
+
+@item print @var{expr-list} >> @var{file}
+Prints expressions to @var{file}.  The previous contents of @var{file}
+are retained, and the output of @code{print} is appended to the file.
+
+@item print @var{expr-list} | @var{command}
+Prints expressions, sending the output down a pipe to @var{command}.
+The pipeline to the command stays open until the @code{close} function
+is called.
+
+@item printf @var{fmt, expr-list}
+Format and print.
+
+@item printf @var{fmt, expr-list} > file
+Format and print to @var{file}. If @var{file} does not exist, it is
+created. If it does exist, its contents are deleted the first time the
+@code{printf} is executed.
+
+@item printf @var{fmt, expr-list} >> @var{file}
+Format and print to @var{file}.  The previous contents of @var{file}
+are retained, and the output of @code{printf} is appended to the file.
+
+@item printf @var{fmt, expr-list} | @var{command}
+Format and print, sending the output down a pipe to @var{command}.
+The pipeline to the command stays open until the @code{close} function
+is called.
+@end table
+
+@code{getline} returns zero on end of file, and @minus{}1 on an error.
+In the event of an error, @code{getline} will set @code{ERRNO} to
+the value of a system-dependent string that describes the error.
+
+@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
+@appendixsubsec @code{printf} Summary
+
+Conversion specification have the form
+@code{%}[@var{flag}][@var{width}][@code{.}@var{prec}]@var{format}.
+@c whew!
+Items in brackets are optional.
+
+The @code{awk} @code{printf} statement and @code{sprintf} function
+accept the following conversion specification formats:
+
+@table @code
+@item %c
+An ASCII character.  If the argument used for @samp{%c} is numeric, it is
+treated as a character and printed.  Otherwise, the argument is assumed to
+be a string, and the only first character of that string is printed.
+
+@item %d
+@itemx %i
+A decimal number (the integer part).
+
+@item %e
+@itemx %E
+A floating point number of the form
+@samp{@r{[}-@r{]}d.dddddde@r{[}+-@r{]}dd}.
+The @samp{%E} format uses @samp{E} instead of @samp{e}.
+
+@item %f
+A floating point number of the form
+@r{[}@code{-}@r{]}@code{ddd.dddddd}.
+
+@item %g
+@itemx %G
+Use either the @samp{%e} or @samp{%f} formats, whichever produces a shorter
+string, with non-significant zeros suppressed.
+@samp{%G} will use @samp{%E} instead of @samp{%e}.
+
+@item %o
+An unsigned octal number (again, an integer).
+
+@item %s
+A character string.
+
+@item %x
+@itemx %X
+An unsigned hexadecimal number (an integer).
+The @samp{%X} format uses @samp{A} through @samp{F} instead of
+@samp{a} through @samp{f} for decimal 10 through 15.
+
+@item %%
+A single @samp{%} character; no argument is converted.
+@end table
+
+There are optional, additional parameters that may lie between the @samp{%}
+and the control letter:
+
+@table @code
+@item -
+The expression should be left-justified within its field.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternate form'' for certain control letters.
+For @samp{o}, supply a leading zero.
+For @samp{x}, and @samp{X}, supply a leading @samp{0x} or @samp{0X} for
+a non-zero result.
+For @samp{e}, @samp{E}, and @samp{f}, the result will always contain a
+decimal point.
+For @samp{g}, and @samp{G}, trailing zeros are not removed from the result.
+
+@item 0
+A leading @samp{0} (zero) acts as a flag, that indicates output should be
+padded with zeros instead of spaces.
+This applies even to non-numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to be printed.
+
+@item @var{width}
+The field should be padded to this width. The field is normally padded
+with spaces.  If the @samp{0} flag has been used, it is padded with zeros.
+
+@item .@var{prec}
+A number that specifies the precision to use when printing.
+For the @samp{e}, @samp{E}, and @samp{f} formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the @samp{g}, and @samp{G} formats, it specifies the maximum number
+of significant digits.  For the @samp{d}, @samp{o}, @samp{i}, @samp{u},
+@samp{x}, and @samp{X} formats, it specifies the minimum number of
+digits to print.  For the @samp{s} format, it specifies the maximum number of
+characters from the string that should be printed.
+@end table
+
+Either or both of the @var{width} and @var{prec} values may be specified
+as @samp{*}.  In that case, the particular value is taken from the argument
+list.
+
+@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}.
+
+@node Special File Summary, Built-in Functions Summary, Printf Summary, Actions Summary
+@appendixsubsec Special File Names
+
+When doing I/O redirection from either @code{print} or @code{printf} into a
+file, or via @code{getline} from a file, @code{gawk} recognizes certain special
+file names internally.  These file names allow access to open file descriptors
+inherited from @code{gawk}'s parent process (usually the shell).  The
+file names are:
+
+@table @file
+@item /dev/stdin
+The standard input.
+
+@item /dev/stdout
+The standard output.
+
+@item /dev/stderr
+The standard error output.
+
+@item /dev/fd/@var{n}
+The file denoted by the open file descriptor @var{n}.
+@end table
+
+In addition, reading the following files provides process related information
+about the running @code{gawk} program.  All returned records are terminated
+with a newline.
+
+@table @file
+@item /dev/pid
+Returns the process ID of the current process.
+
+@item  /dev/ppid
+Returns the parent process ID of the current process.
+
+@item  /dev/pgrpid
+Returns the process group ID of the current process.
+
+@item /dev/user
+At least four space-separated fields, containing the return values of
+the @code{getuid}, @code{geteuid}, @code{getgid}, and @code{getegid}
+system calls.
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)
+@end table
+
+@noindent
+These file names may also be used on the command line to name data files.
+These file names are only recognized internally if you do not
+actually have files with these names on your system.
+
+@xref{Special Files, ,Special File Names in @code{gawk}}, for a longer description that
+provides the motivation for this feature.
+
+@node Built-in Functions Summary, Time Functions Summary, Special File Summary, Actions Summary
+@appendixsubsec Built-in Functions
+
+@code{awk} provides a number of built-in functions for performing
+numeric operations, string related operations, and I/O related operations.
+
+The built-in arithmetic functions are:
+
+@table @code
+@item atan2(@var{y}, @var{x})
+the arctangent of @var{y/x} in radians.
+
+@item cos(@var{expr})
+the cosine of @var{expr}, which is in radians.
+
+@item exp(@var{expr})
+the exponential function (@code{e ^ @var{expr}}).
+
+@item int(@var{expr})
+truncates to integer.
+
+@item log(@var{expr})
+the natural logarithm of @code{expr}.
+
+@item rand()
+a random number between zero and one.
+
+@item sin(@var{expr})
+the sine of @var{expr}, which is in radians.
+
+@item sqrt(@var{expr})
+the square root function.
+
+@item srand(@r{[}@var{expr}@r{]})
+use @var{expr} as a new seed for the random number generator.  If no @var{expr}
+is provided, the time of day is used.  The return value is the previous
+seed for the random number generator.
+@end table
+
+@code{awk} has the following built-in string functions:
+
+@table @code
+@item gensub(@var{regex}, @var{subst}, @var{how} @r{[}, @var{target}@r{]})
+If @var{how} is a string beginning with @samp{g} or @samp{G}, then
+replace each match of @var{regex} in @var{target} with @var{subst}.
+Otherwise, replace the @var{how}'th occurrence. If @var{target} is not
+supplied, use @code{$0}.  The return value is the changed string; the
+original @var{target} is not modified. Within @var{subst},
+@samp{\@var{n}}, where @var{n} is a digit from one to nine, can be used to
+indicate the text that matched the @var{n}'th parenthesized
+subexpression.
+This function is @code{gawk}-specific.
+
+@item gsub(@var{regex}, @var{subst} @r{[}, @var{target}@r{]})
+for each substring matching the regular expression @var{regex} in the string
+@var{target}, substitute the string @var{subst}, and return the number of
+substitutions. If @var{target} is not supplied, use @code{$0}.
+
+@item index(@var{str}, @var{search})
+returns the index of the string @var{search} in the string @var{str}, or
+zero if
+@var{search} is not present.
+
+@item length(@r{[}@var{str}@r{]})
+returns the length of the string @var{str}.  The length of @code{$0}
+is returned if no argument is supplied.
+
+@item match(@var{str}, @var{regex})
+returns the position in @var{str} where the regular expression @var{regex}
+occurs, or zero if @var{regex} is not present, and sets the values of
+@code{RSTART} and @code{RLENGTH}.
+
+@item split(@var{str}, @var{arr} @r{[}, @var{regex}@r{]})
+splits the string @var{str} into the array @var{arr} on the regular expression
+@var{regex}, and returns the number of elements.  If @var{regex} is omitted,
+@code{FS} is used instead. @var{regex} can be the null string, causing
+each character to be placed into its own array element.
+The array @var{arr} is cleared first.
+
+@item sprintf(@var{fmt}, @var{expr-list})
+prints @var{expr-list} according to @var{fmt}, and returns the resulting string.
+
+@item sub(@var{regex}, @var{subst} @r{[}, @var{target}@r{]})
+just like @code{gsub}, but only the first matching substring is replaced.
+
+@item substr(@var{str}, @var{index} @r{[}, @var{len}@r{]})
+returns the @var{len}-character substring of @var{str} starting at @var{index}.
+If @var{len} is omitted, the rest of @var{str} is used.
+
+@item tolower(@var{str})
+returns a copy of the string @var{str}, with all the upper-case characters in
+@var{str} translated to their corresponding lower-case counterparts.
+Non-alphabetic characters are left unchanged.
+
+@item toupper(@var{str})
+returns a copy of the string @var{str}, with all the lower-case characters in
+@var{str} translated to their corresponding upper-case counterparts.
+Non-alphabetic characters are left unchanged.
+@end table
+
+The I/O related functions are:
+
+@table @code
+@item close(@var{expr})
+Close the open file or pipe denoted by @var{expr}.
+
+@item fflush(@r{[}@var{expr}@r{]})
+Flush any buffered output for the output file or pipe denoted by @var{expr}.
+If @var{expr} is omitted, standard output is flushed.
+If @var{expr} is the null string (@code{""}), all output buffers are flushed.
+
+@item system(@var{cmd-line})
+Execute the command @var{cmd-line}, and return the exit status.
+If your operating system does not support @code{system}, calling it will
+generate a fatal error.
+
+@samp{system("")} can be used to force @code{awk} to flush any pending
+output.  This is more portable, but less obvious, than calling @code{fflush}.
+@end table
+
+@node Time Functions Summary, String Constants Summary, Built-in Functions Summary, Actions Summary
+@appendixsubsec Time Functions
+
+The following two functions are available for getting the current
+time of day, and for formatting time stamps.
+They are specific to @code{gawk}.
+
+@table @code
+@item systime()
+returns the current time of day as the number of seconds since a particular
+epoch (Midnight, January 1, 1970 UTC, on POSIX systems).
+
+@item strftime(@r{[}@var{format}@r{[}, @var{timestamp}@r{]]})
+formats @var{timestamp} according to the specification in @var{format}.
+The current time of day is used if no @var{timestamp} is supplied.
+A default format equivalent to the output of the @code{date} utility is used if
+no @var{format} is supplied.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
+details on the conversion specifiers that @code{strftime} accepts.
+@end table
+
+@iftex
+@xref{Built-in, ,Built-in Functions}, for a description of all of
+@code{awk}'s built-in functions.
+@end iftex
+
+@node String Constants Summary,  , Time Functions Summary, Actions Summary
+@appendixsubsec String Constants
+
+String constants in @code{awk} are sequences of characters enclosed
+in double quotes (@code{"}).  Within strings, certain @dfn{escape sequences}
+are recognized, as in C.  These are:
+
+@table @code
+@item \\
+A literal backslash.
+
+@item \a
+The ``alert'' character; usually the ASCII BEL character.
+
+@item \b
+Backspace.
+
+@item \f
+Formfeed.
+
+@item \n
+Newline.
+
+@item \r
+Carriage return.
+
+@item \t
+Horizontal tab.
+
+@item \v
+Vertical tab.
+
+@item \x@var{hex digits}
+The character represented by the string of hexadecimal digits following
+the @samp{\x}.  As in ANSI C, all following hexadecimal digits are
+considered part of the escape sequence.  E.g., @code{"\x1B"} is a
+string containing the ASCII ESC (escape) character.  (The @samp{\x}
+escape sequence is not in POSIX @code{awk}.)
+
+@item \@var{ddd}
+The character represented by the one, two, or three digit sequence of octal
+digits.  Thus, @code{"\033"} is also a string containing the ASCII ESC
+(escape) character.
+
+@item \@var{c}
+The literal character @var{c}, if @var{c} is not one of the above.
+@end table
+
+The escape sequences may also be used inside constant regular expressions
+(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
+characters).
+
+@xref{Escape Sequences}.
+
+@node Functions Summary, Historical Features, Actions Summary, Gawk Summary
+@appendixsec User-defined Functions
+
+Functions in @code{awk} are defined as follows:
+
+@example
+function @var{name}(@var{parameter list}) @{ @var{statements} @}
+@end example
+
+Actual parameters supplied in the function call are used to instantiate
+the formal parameters declared in the function.  Arrays are passed by
+reference, other variables are passed by value.
+
+If there are fewer arguments passed than there are names in @var{parameter-list},
+the extra names are given the null string as their value.  Extra names have the
+effect of local variables.
+
+The open-parenthesis in a function call of a user-defined function must
+immediately follow the function name, without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+
+The word @code{func} may be used in place of @code{function} (but not in
+POSIX @code{awk}).
+
+Use the @code{return} statement to return a value from a function.
+
+@xref{User-defined, ,User-defined Functions}.
+
+@node Historical Features,  , Functions Summary, Gawk Summary
+@appendixsec Historical Features
+
+@cindex historical features
+There are two features of historical @code{awk} implementations that
+@code{gawk} supports.
+
+First, it is possible to call the @code{length} built-in function not only
+with no arguments, but even without parentheses!
+
+@example
+a = length
+@end example
+
+@noindent
+is the same as either of
+
+@example
+a = length()
+a = length($0)
+@end example
+
+@noindent
+For example:
+
+@example
+$ echo abcdef | awk '@{ print length @}'
+@print{} 6
+@end example
+
+@noindent
+This feature is marked as ``deprecated'' in the POSIX standard, and
+@code{gawk} will issue a warning about its use if @samp{--lint} is
+specified on the command line.
+(The ability to use @code{length} this way was actually an accident of the
+original Unix @code{awk} implementation.  If any built-in function used
+@code{$0} as its default argument, it was possible to call that function
+without the parentheses.  In particular, it was common practice to use
+the @code{length} function in this fashion, and this usage was documented
+in the @code{awk} manual page.)
+
+The other historical feature is the use of either the @code{break} statement,
+or the @code{continue} statement
+outside the body of a @code{while}, @code{for}, or @code{do} loop.  Traditional
+@code{awk} implementations have treated such usage as equivalent to the
+@code{next} statement.  More recent versions of Unix @code{awk} do not allow
+it. @code{gawk} supports this usage if @samp{--traditional} has been
+specified.
+
+@xref{Options, ,Command Line Options}, for more information about the
+@samp{--posix} and @samp{--lint} options.
+
+@node Installation, Notes, Gawk Summary, Top
+@appendix Installing @code{gawk}
+
+This appendix provides instructions for installing @code{gawk} on the
+various platforms that are supported by the developers.  The primary
+developers support Unix (and one day, GNU), while the other ports were
+contributed.  The file @file{ACKNOWLEDGMENT} in the @code{gawk}
+distribution lists the electronic mail addresses of the people who did
+the respective ports, and they are also provided in
+@ref{Bugs, , Reporting Problems and Bugs}.
+
+@menu
+* Gawk Distribution::           What is in the @code{gawk} distribution.
+* Unix Installation::           Installing @code{gawk} under various versions
+                                of Unix.
+* VMS Installation::            Installing @code{gawk} on VMS.
+* PC Installation::             Installing and Compiling @code{gawk} on MS-DOS
+                                and OS/2
+* Atari Installation::          Installing @code{gawk} on the Atari ST.
+* Amiga Installation::          Installing @code{gawk} on an Amiga.
+* Bugs::                        Reporting Problems and Bugs.
+* Other Versions::              Other freely available @code{awk}
+                                implementations.
+@end menu
+
+@node Gawk Distribution, Unix Installation, Installation, Installation
+@appendixsec The @code{gawk} Distribution
+
+This section first describes how to get the @code{gawk}
+distribution, how to extract it, and then what is in the various files and
+subdirectories.
+
+@menu
+* Getting::                     How to get the distribution.
+* Extracting::                  How to extract the distribution.
+* Distribution contents::       What is in the distribution.
+@end menu
+
+@node Getting, Extracting, Gawk Distribution, Gawk Distribution
+@appendixsubsec Getting the @code{gawk} Distribution
+@cindex getting @code{gawk}
+@cindex anonymous @code{ftp}
+@cindex @code{ftp}, anonymous
+@cindex Free Software Foundation
+There are three ways you can get GNU software.
+
+@enumerate
+@item
+You can copy it from someone else who already has it.
+
+@cindex Free Software Foundation
+@item
+You can order @code{gawk} directly from the Free Software Foundation.
+Software distributions are available for Unix, MS-DOS, and VMS, on
+tape and CD-ROM.  The address is:
+
+@quotation
+Free Software Foundation @*
+59 Temple Place---Suite 330 @*
+Boston, MA  02111-1307 USA @*
+Phone: +1-617-542-5942 @*
+Fax (including Japan): +1-617-542-2652 @*
+E-mail: @code{gnu@@prep.ai.mit.edu} @*
+@end quotation
+
+@noindent
+Ordering from the FSF directly contributes to the support of the foundation
+and to the production of more free software.
+
+@item
+You can get @code{gawk} by using anonymous @code{ftp} to the Internet host
+@code{ftp.gnu.ai.mit.edu}, in the directory @file{/pub/gnu}.
+
+Here is a list of alternate @code{ftp} sites from which you can obtain GNU
+software.  When a site is listed as ``@var{site}@code{:}@var{directory}'' the
+@var{directory} indicates the directory where GNU software is kept.
+You should use a site that is geographically close to you.
+
+@table @asis
+@item Asia:
+@table @code
+@item cair-archive.kaist.ac.kr:/pub/gnu
+@itemx ftp.cs.titech.ac.jp
+@itemx ftp.nectec.or.th:/pub/mirrors/gnu
+@itemx utsun.s.u-tokyo.ac.jp:/ftpsync/prep
+@end table
+
+@item Australia:
+@table @code
+@item archie.au:/gnu
+(@code{archie.oz} or @code{archie.oz.au} for ACSnet)
+@end table
+
+@item Africa:
+@table @code
+@item ftp.sun.ac.za:/pub/gnu
+@end table
+
+@item Middle East:
+@table @code
+@item ftp.technion.ac.il:/pub/unsupported/gnu
+@end table
+
+@item Europe:
+@table @code
+@item archive.eu.net
+@itemx ftp.denet.dk
+@itemx ftp.eunet.ch
+@itemx ftp.funet.fi:/pub/gnu
+@itemx ftp.ieunet.ie:pub/gnu
+@itemx ftp.informatik.rwth-aachen.de:/pub/gnu
+@itemx ftp.informatik.tu-muenchen.de
+@itemx ftp.luth.se:/pub/unix/gnu
+@itemx ftp.mcc.ac.uk
+@itemx ftp.stacken.kth.se
+@itemx ftp.sunet.se:/pub/gnu
+@itemx ftp.univ-lyon1.fr:pub/gnu
+@itemx ftp.win.tue.nl:/pub/gnu
+@itemx irisa.irisa.fr:/pub/gnu
+@itemx isy.liu.se
+@itemx nic.switch.ch:/mirror/gnu
+@itemx src.doc.ic.ac.uk:/gnu
+@itemx unix.hensa.ac.uk:/pub/uunet/systems/gnu
+@end table
+
+@item South America:
+@table @code
+@item ftp.inf.utfsm.cl:/pub/gnu
+@itemx ftp.unicamp.br:/pub/gnu
+@end table
+
+@item Western Canada:
+@table @code
+@item ftp.cs.ubc.ca:/mirror2/gnu
+@end table
+
+@item USA:
+@table @code
+@item col.hp.com:/mirrors/gnu
+@itemx f.ms.uky.edu:/pub3/gnu
+@itemx ftp.cc.gatech.edu:/pub/gnu
+@itemx ftp.cs.columbia.edu:/archives/gnu/prep
+@itemx ftp.digex.net:/pub/gnu
+@itemx ftp.hawaii.edu:/mirrors/gnu
+@itemx ftp.kpc.com:/pub/mirror/gnu
+@end table
+
+@c NEEDED
+@page
+@item USA (continued):
+@table @code
+@itemx ftp.uu.net:/systems/gnu
+@itemx gatekeeper.dec.com:/pub/GNU
+@itemx jaguar.utah.edu:/gnustuff
+@itemx labrea.stanford.edu
+@itemx mrcnext.cso.uiuc.edu:/pub/gnu
+@itemx vixen.cso.uiuc.edu:/gnu
+@itemx wuarchive.wustl.edu:/systems/gnu
+@end table
+@end table
+@end enumerate
+
+@node Extracting, Distribution contents, Getting, Gawk Distribution
+@appendixsubsec Extracting the Distribution
+@code{gawk} is distributed as a @code{tar} file compressed with the
+GNU Zip program, @code{gzip}.
+
+Once you have the distribution (for example,
+@file{gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz}), first use @code{gzip} to expand the
+file, and then use @code{tar} to extract it.  You can use the following
+pipeline to produce the @code{gawk} distribution:
+
+@example
+# Under System V, add 'o' to the tar flags
+gzip -d -c gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz | tar -xvpf -
+@end example
+
+@noindent
+This will create a directory named @file{gawk-@value{VERSION}.@value{PATCHLEVEL}} in the current
+directory.
+
+The distribution file name is of the form
+@file{gawk-@var{V}.@var{R}.@var{n}.tar.gz}.
+The @var{V} represents the major version of @code{gawk},
+the @var{R} represents the current release of version @var{V}, and
+the @var{n} represents a @dfn{patch level}, meaning that minor bugs have
+been fixed in the release.  The current patch level is @value{PATCHLEVEL},
+but when
+retrieving distributions, you should get the version with the highest
+version, release, and patch level.  (Note that release levels greater than
+or equal to 90 denote ``beta,'' or non-production software; you may not wish
+to retrieve such a version unless you don't mind experimenting.)
+
+If you are not on a Unix system, you will need to make other arrangements
+for getting and extracting the @code{gawk} distribution.  You should consult
+a local expert.
+
+@node Distribution contents,  , Extracting, Gawk Distribution
+@appendixsubsec Contents of the @code{gawk} Distribution
+
+The @code{gawk} distribution has a number of C source files,
+documentation files,
+subdirectories and files related to the configuration process
+(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
+and several subdirectories related to different, non-Unix,
+operating systems.
+
+@table @asis
+@item various @samp{.c}, @samp{.y}, and @samp{.h} files 
+These files are the actual @code{gawk} source code.
+@end table
+
+@table @file
+@item README
+@itemx README_d/README.*
+Descriptive files: @file{README} for @code{gawk} under Unix, and the
+rest for the various hardware and software combinations.
+
+@item INSTALL
+A file providing an overview of the configuration and installation process.
+
+@item PORTS
+A list of systems to which @code{gawk} has been ported, and which
+have successfully run the test suite.
+
+@item ACKNOWLEDGMENT
+A list of the people who contributed major parts of the code or documentation.
+
+@item ChangeLog
+A detailed list of source code changes as bugs are fixed or improvements made.
+
+@item NEWS
+A list of changes to @code{gawk} since the last release or patch.
+
+@item COPYING
+The GNU General Public License.
+
+@item FUTURES
+A brief list of features and/or changes being contemplated for future
+releases, with some indication of the time frame for the feature, based
+on its difficulty.
+
+@item LIMITATIONS
+A list of those factors that limit @code{gawk}'s performance.
+Most of these depend on the hardware or operating system software, and
+are not limits in @code{gawk} itself.
+
+@item POSIX.STD
+A description of one area where the POSIX standard for @code{awk} is
+incorrect, and how @code{gawk} handles the problem.
+
+@item PROBLEMS
+A file describing known problems with the current release.
+
+@cindex artificial intelligence, using @code{gawk}
+@cindex AI programming, using @code{gawk}
+@item doc/awkforai.txt
+A short article describing why @code{gawk} is a good language for
+AI (Artificial Intelligence) programming.
+
+@item doc/README.card
+@itemx doc/ad.block
+@itemx doc/awkcard.in
+@itemx doc/cardfonts
+@itemx doc/colors
+@itemx doc/macros
+@itemx doc/no.colors
+@itemx doc/setter.outline
+The @code{troff} source for a five-color @code{awk} reference card.
+A modern version of @code{troff}, such as GNU Troff (@code{groff}) is
+needed to produce the color version. See the file @file{README.card}
+for instructions if you have an older @code{troff}.
+
+@item doc/gawk.1
+The @code{troff} source for a manual page describing @code{gawk}.
+This is distributed for the convenience of Unix users.
+
+@item doc/gawk.texi
+The Texinfo source file for this @value{DOCUMENT}.
+It should be processed with @TeX{} to produce a printed document, and
+with @code{makeinfo} to produce an Info file.
+
+@item doc/gawk.info
+The generated Info file for this @value{DOCUMENT}.
+
+@item doc/igawk.1
+The @code{troff} source for a manual page describing the @code{igawk}
+program presented in
+@ref{Igawk Program, ,An Easy Way to Use Library Functions}.
+
+@item doc/Makefile.in
+The input file used during the configuration process to generate the
+actual @file{Makefile} for creating the documentation.
+
+@item Makefile.in
+@itemx acconfig.h
+@itemx aclocal.m4
+@itemx configh.in
+@itemx configure.in
+@itemx configure
+@itemx custom.h
+@itemx missing/*
+These files and subdirectory are used when configuring @code{gawk}
+for various Unix systems.  They are explained in detail in
+@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.
+
+@item awklib/extract.awk
+@itemx awklib/Makefile.in
+The @file{awklib} directory contains a copy of @file{extract.awk}
+(@pxref{Extract Program, ,Extracting Programs from Texinfo Source Files}),
+which can be used to extract the sample programs from the Texinfo
+source file for this @value{DOCUMENT}, and a @file{Makefile.in} file, which
+@code{configure} uses to generate a @file{Makefile}.
+As part of the process of building @code{gawk}, the library functions from
+@ref{Library Functions, , A Library of @code{awk} Functions},
+and the @code{igawk} program from
+@ref{Igawk Program, , An Easy Way to Use Library Functions},
+are extracted into ready to use files.
+They are installed as part of the installation process.
+
+@item atari/*
+Files needed for building @code{gawk} on an Atari ST.
+@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
+
+@item pc/*
+Files needed for building @code{gawk} under MS-DOS and OS/2.
+@xref{PC Installation, ,MS-DOS and OS/2 Installation and Compilation}, for details.
+
+@item vms/*
+Files needed for building @code{gawk} under VMS.
+@xref{VMS Installation, ,How to Compile and Install @code{gawk} on VMS}, for details.
+
+@item test/*
+A test suite for
+@code{gawk}.  You can use @samp{make check} from the top level @code{gawk}
+directory to run your version of @code{gawk} against the test suite.
+If @code{gawk} successfully passes @samp{make check} then you can
+be confident of a successful port.
+@end table
+
+@node Unix Installation, VMS Installation, Gawk Distribution, Installation
+@appendixsec Compiling and Installing @code{gawk} on Unix
+
+Usually, you can compile and install @code{gawk} by typing only two
+commands.  However, if you do use an unusual system, you may need
+to configure @code{gawk} for your system yourself.
+
+@menu
+* Quick Installation::          Compiling @code{gawk} under Unix.
+* Configuration Philosophy::    How it's all supposed to work.
+@end menu
+
+@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
+@appendixsubsec Compiling @code{gawk} for Unix
+
+@cindex installation, unix
+After you have extracted the @code{gawk} distribution, @code{cd}
+to @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}.  Like most GNU software,
+@code{gawk} is configured
+automatically for your Unix system by running the @code{configure} program.
+This program is a Bourne shell script that was generated automatically using
+GNU @code{autoconf}.
+@iftex
+(The @code{autoconf} software is
+described fully in
+@cite{Autoconf---Generating Automatic Configuration Scripts},
+which is available from the Free Software Foundation.)
+@end iftex
+@ifinfo
+(The @code{autoconf} software is described fully starting with
+@ref{Top, , Introduction, autoconf, Autoconf---Generating Automatic Configuration Scripts}.)
+@end ifinfo
+
+To configure @code{gawk}, simply run @code{configure}:
+
+@example
+sh ./configure
+@end example
+
+This produces a @file{Makefile} and @file{config.h} tailored to your system.
+The @file{config.h} file describes various facts about your system.
+You may wish to edit the @file{Makefile} to
+change the @code{CFLAGS} variable, which controls
+the command line options that are passed to the C compiler (such as
+optimization levels, or compiling for debugging).
+
+Alternatively, you can add your own values for most @code{make}
+variables, such as @code{CC} and @code{CFLAGS}, on the command line when
+running @code{configure}:
+
+@example
+CC=cc CFLAGS=-g sh ./configure
+@end example
+
+@noindent
+See the file @file{INSTALL} in the @code{gawk} distribution for
+all the details.
+
+After you have run @code{configure}, and possibly edited the @file{Makefile},
+type:
+
+@example
+make
+@end example
+
+@noindent
+and shortly thereafter, you should have an executable version of @code{gawk}.
+That's all there is to it!
+(If these steps do not work, please send in a bug report;
+@pxref{Bugs, ,Reporting Problems and Bugs}.)
+
+@node Configuration Philosophy, , Quick Installation, Unix Installation
+@appendixsubsec The Configuration Process
+
+@cindex configuring @code{gawk}
+(This section is of interest only if you know something about using the
+C language and the Unix operating system.)
+
+The source code for @code{gawk} generally attempts to adhere to formal
+standards wherever possible.  This means that @code{gawk} uses library
+routines that are specified by the ANSI C standard and by the POSIX
+operating system interface standard.  When using an ANSI C compiler,
+function prototypes are used to help improve the compile-time checking.
+
+Many Unix systems do not support all of either the ANSI or the
+POSIX standards.  The @file{missing} subdirectory in the @code{gawk}
+distribution contains replacement versions of those subroutines that are
+most likely to be missing.
+
+The @file{config.h} file that is created by the @code{configure} program
+contains definitions that describe features of the particular operating
+system where you are attempting to compile @code{gawk}.  The three things
+described by this file are what header files are available, so that
+they can be correctly included,
+what (supposedly) standard functions are actually available in your C
+libraries, and
+other miscellaneous facts about your
+variant of Unix.  For example, there may not be an @code{st_blksize}
+element in the @code{stat} structure.  In this case @samp{HAVE_ST_BLKSIZE}
+would be undefined.
+
+@cindex @code{custom.h} configuration file
+It is possible for your C compiler to lie to @code{configure}. It may
+do so by not exiting with an error when a library function is not
+available.  To get around this, you can edit the file @file{custom.h}.
+Use an @samp{#ifdef} that is appropriate for your system, and either
+@code{#define} any constants that @code{configure} should have defined but
+didn't, or @code{#undef} any constants that @code{configure} defined and
+should not have.  @file{custom.h} is automatically included by
+@file{config.h}.
+
+It is also possible that the @code{configure} program generated by
+@code{autoconf}
+will not work on your system in some other fashion.  If you do have a problem,
+the file
+@file{configure.in} is the input for @code{autoconf}.  You may be able to
+change this file, and generate a new version of @code{configure} that will
+work on your system.  @xref{Bugs, ,Reporting Problems and Bugs}, for
+information on how to report problems in configuring @code{gawk}.  The same
+mechanism may be used to send in updates to @file{configure.in} and/or
+@file{custom.h}.
+
+@node VMS Installation, PC Installation, Unix Installation, Installation
+@appendixsec How to Compile and Install @code{gawk} on VMS
+
+@c based on material from Pat Rankin <rankin@eql.caltech.edu>
+
+@cindex installation, vms
+This section describes how to compile and install @code{gawk} under VMS.
+
+@menu
+* VMS Compilation::             How to compile @code{gawk} under VMS.
+* VMS Installation Details::    How to install @code{gawk} under VMS.
+* VMS Running::                 How to run @code{gawk} under VMS.
+* VMS POSIX::                   Alternate instructions for VMS POSIX.
+@end menu
+
+@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
+@appendixsubsec Compiling @code{gawk} on VMS
+
+To compile @code{gawk} under VMS, there is a @code{DCL} command procedure that
+will issue all the necessary @code{CC} and @code{LINK} commands, and there is
+also a @file{Makefile} for use with the @code{MMS} utility.  From the source
+directory, use either
+
+@example
+$ @@[.VMS]VMSBUILD.COM
+@end example
+
+@noindent
+or
+
+@example
+$ MMS/DESCRIPTION=[.VMS]DESCRIP.MMS GAWK
+@end example
+
+Depending upon which C compiler you are using, follow one of the sets
+of instructions in this table:
+
+@table @asis
+@item VAX C V3.x
+Use either @file{vmsbuild.com} or @file{descrip.mms} as is.  These use
+@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
+
+@item VAX C V2.x
+You must have Version 2.3 or 2.4; older ones won't work.  Edit either
+@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
+For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
+Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
+and comment out or delete the two lines @samp{#define __STDC__ 0} and
+@samp{#define VAXC_BUILTINS} near the end.
+
+@item GNU C
+Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
+from those for VAX C V2.x, but equally straightforward.  No changes to
+@file{config.h} should be needed.
+
+@item DEC C
+Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
+No changes to @file{config.h} should be needed.
+@end table
+
+@code{gawk} has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+GNU C 1.40 and 2.3.  It should work without modifications for VMS V4.6 and up.
+
+@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
+@appendixsubsec Installing @code{gawk} on VMS
+
+To install @code{gawk}, all you need is a ``foreign'' command, which is
+a @code{DCL} symbol whose value begins with a dollar sign. For example:
+
+@example
+$ GAWK :== $disk1:[gnubin]GAWK
+@end example
+
+@noindent
+(Substitute the actual location of @code{gawk.exe} for
+@samp{$disk1:[gnubin]}.) The symbol should be placed in the
+@file{login.com} of any user who wishes to run @code{gawk},
+so that it will be defined every time the user logs on.
+Alternatively, the symbol may be placed in the system-wide
+@file{sylogin.com} procedure, which will allow all users
+to run @code{gawk}.
+
+Optionally, the help entry can be loaded into a VMS help library:
+
+@example
+$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+@end example
+
+@noindent
+(You may want to substitute a site-specific help library rather than
+the standard VMS library @samp{HELPLIB}.)  After loading the help text,
+
+@example
+$ HELP GAWK
+@end example
+
+@noindent
+will provide information about both the @code{gawk} implementation and the
+@code{awk} programming language.
+
+The logical name @samp{AWK_LIBRARY} can designate a default location
+for @code{awk} program files.  For the @samp{-f} option, if the specified
+filename has no device or directory path information in it, @code{gawk}
+will look in the current directory first, then in the directory specified
+by the translation of @samp{AWK_LIBRARY} if the file was not found.
+If after searching in both directories, the file still is not found,
+then @code{gawk} appends the suffix @samp{.awk} to the filename and the
+file search will be re-tried.  If @samp{AWK_LIBRARY} is not defined, that
+portion of the file search will fail benignly.
+
+@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
+@appendixsubsec Running @code{gawk} on VMS
+
+Command line parsing and quoting conventions are significantly different
+on VMS, so examples in this @value{DOCUMENT} or from other sources often need minor
+changes.  They @emph{are} minor though, and all @code{awk} programs
+should run correctly.
+
+Here are a couple of trivial tests:
+
+@example
+$ gawk -- "BEGIN @{print ""Hello, World!""@}"
+$ gawk -"W" version
+! could also be -"W version" or "-W version"
+@end example
+
+@noindent
+Note that upper-case and mixed-case text must be quoted.
+
+The VMS port of @code{gawk} includes a @code{DCL}-style interface in addition
+to the original shell-style interface (see the help entry for details).
+One side-effect of dual command line parsing is that if there is only a
+single parameter (as in the quoted string program above), the command
+becomes ambiguous.  To work around this, the normally optional @samp{--}
+flag is required to force Unix style rather than @code{DCL} parsing.  If any
+other dash-type options (or multiple parameters such as data files to be
+processed) are present, there is no ambiguity and @samp{--} can be omitted.
+
+The default search path when looking for @code{awk} program files specified
+by the @samp{-f} option is @code{"SYS$DISK:[],AWK_LIBRARY:"}.  The logical
+name @samp{AWKPATH} can be used to override this default.  The format
+of @samp{AWKPATH} is a comma-separated list of directory specifications.
+When defining it, the value should be quoted so that it retains a single
+translation, and not a multi-translation @code{RMS} searchlist.
+
+@node VMS POSIX,  , VMS Running, VMS Installation
+@appendixsubsec Building and Using @code{gawk} on VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library.  The source tree should be unpacked
+into a container file subsystem rather than into the ordinary VMS file
+system.  Make sure that the two scripts, @file{configure} and
+@file{vms/posix-cc.sh}, are executable; use @samp{chmod +x} on them if
+necessary.  Then execute the following two commands:
+
+@example
+@group
+psx> CC=vms/posix-cc.sh configure
+psx> make CC=c89 gawk
+@end group
+@end example
+
+@noindent
+The first command will construct files @file{config.h} and @file{Makefile} out
+of templates, using a script to make the C compiler fit @code{configure}'s
+expectations.  The second command will compile and link @code{gawk} using
+the C compiler directly; ignore any warnings from @code{make} about being
+unable to redefine @code{CC}.  @code{configure} will take a very long
+time to execute, but at least it provides incremental feedback as it
+runs.
+
+This has been tested with VAX/VMS V6.2, VMS POSIX V2.0, and DEC C V5.2.
+
+Once built, @code{gawk} will work like any other shell utility.  Unlike
+the normal VMS port of @code{gawk}, no special command line manipulation is
+needed in the VMS POSIX environment.
+
+@c Rewritten by Scott Deifik <scottd@amgen.com>
+@c and Darrel Hankerson <hankedr@mail.auburn.edu>
+@node PC Installation, Atari Installation, VMS Installation, Installation
+@appendixsec MS-DOS and OS/2 Installation and Compilation
+
+@cindex installation, MS-DOS and OS/2 
+If you have received a binary distribution prepared by the DOS
+maintainers, then @code{gawk} and the necessary support files will appear
+under the @file{gnu} directory, with executables in @file{gnu/bin},
+libraries in @file{gnu/lib/awk}, and manual pages under @file{gnu/man}.
+This is designed for easy installation to a @file{/gnu} directory on your
+drive, but the files can be installed anywhere provided @code{AWKPATH} is
+set properly.  Regardless of the installation directory, the first line of
+@file{igawk.cmd} and @file{igawk.bat} (in @file{gnu/bin}) may need to be
+edited.
+
+The binary distribution will contain a separate file describing the
+contents. In particular, it may include more than one version of the
+@code{gawk} executable. OS/2 binary distributions may have a 
+different arrangement, but installation is similar.
+
+The OS/2 and MS-DOS versions of @code{gawk} search for program files as
+described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+However, semicolons (rather than colons) separate elements
+in the @code{AWKPATH} variable. If @code{AWKPATH} is not set or is empty,
+then the default search path is @code{@w{".;c:/lib/awk;c:/gnu/lib/awk"}}.
+
+An @code{sh}-like shell (as opposed to @code{command.com} under MS-DOS 
+or @code{cmd.exe} under OS/2) may be useful for @code{awk} programming.
+Ian Stewartson has written an excellent shell for MS-DOS and OS/2, and a
+@code{ksh} clone and GNU Bash are available for OS/2. The file
+@file{README_d/README.pc} in the @code{gawk} distribution contains
+information on these shells. Users of Stewartson's shell on DOS should
+examine its documentation on handling of command-lines. In particular,
+the setting for @code{gawk} in the shell configuration may need to be
+changed, and the @code{ignoretype} option may also be of interest.
+
+@code{gawk} can be compiled for MS-DOS and OS/2 using the GNU development tools
+from DJ Delorie (DJGPP, MS-DOS-only) or Eberhard Mattes (EMX, MS-DOS and OS/2).
+Microsoft C can be used to build 16-bit versions for MS-DOS and OS/2.  The file
+@file{README_d/README.pc} in the @code{gawk} distribution contains additional
+notes, and @file{pc/Makefile} contains important notes on compilation options.
+
+To build @code{gawk}, copy the files in the @file{pc} directory (@emph{except}
+for @file{ChangeLog}) to the
+directory with the rest of the @code{gawk} sources. The @file{Makefile}
+contains a configuration section with comments, and may need to be
+edited in order to work with your @code{make} utility.
+
+The @file{Makefile} contains a number of targets for building various MS-DOS
+and OS/2 versions. A list of targets will be printed if the @code{make}
+command is given without a target. As an example, to build @code{gawk}
+using the DJGPP tools, enter @samp{make djgpp}.
+
+Using @code{make} to run the standard tests and to install @code{gawk}
+requires additional Unix-like tools, including @code{sh}, @code{sed}, and
+@code{cp}. In order to run the tests, the @file{test/*.ok} files may need to
+be converted so that they have the usual DOS-style end-of-line markers. Most
+of the tests will work properly with Stewartson's shell along with the
+companion utilities or appropriate GNU utilities.  However, some editing of
+@file{test/Makefile} is required. It is recommended that the file
+@file{pc/Makefile.tst} be copied to @file{test/Makefile} as a
+replacement. Details can be found in @file{README_d/README.pc}.
+
+@node Atari Installation, Amiga Installation, PC Installation, Installation
+@appendixsec Installing @code{gawk} on the Atari ST
+
+@c based on material from Michal Jaegermann <michal@gortel.phys.ualberta.ca>
+
+@cindex atari
+@cindex installation, atari
+There are no substantial differences when installing @code{gawk} on
+various Atari models.  Compiled @code{gawk} executables do not require
+a large amount of memory with most @code{awk} programs and should run on all
+Motorola processor based models (called further ST, even if that is not
+exactly right).
+
+In order to use @code{gawk}, you need to have a shell, either text or
+graphics, that does not map all the characters of a command line to
+upper-case.  Maintaining case distinction in option flags is very
+important (@pxref{Options, ,Command Line Options}).
+These days this is the default, and it may only be a problem for some
+very old machines.  If your system does not preserve the case of option
+flags, you will need to upgrade your tools.  Support for I/O
+redirection is necessary to make it easy to import @code{awk} programs
+from other environments.  Pipes are nice to have, but not vital.
+
+@menu
+* Atari Compiling::           Compiling @code{gawk} on Atari
+* Atari Using::               Running @code{gawk} on Atari
+@end menu
+
+@node Atari Compiling, Atari Using, Atari Installation, Atari Installation
+@appendixsubsec Compiling @code{gawk} on the Atari ST
+
+A proper compilation of @code{gawk} sources when @code{sizeof(int)}
+differs from @code{sizeof(void *)} requires an ANSI C compiler. An initial
+port was done with @code{gcc}.  You may actually prefer executables
+where @code{int}s are four bytes wide, but the other variant works as well.
+
+You may need quite a bit of memory when trying to recompile the @code{gawk}
+sources, as some source files (@file{regex.c} in particular) are quite
+big.  If you run out of memory compiling such a file, try reducing the
+optimization level for this particular file; this may help.
+
+@cindex Linux
+With a reasonable shell (Bash will do), and in particular if you run
+Linux, MiNT or a similar operating system, you have a pretty good
+chance that the @code{configure} utility will succeed.  Otherwise
+sample versions of @file{config.h} and @file{Makefile.st} are given in the
+@file{atari} subdirectory and can be edited and copied to the
+corresponding files in the main source directory.  Even if
+@code{configure} produced something, it might be advisable to compare
+its results with the sample versions and possibly make adjustments.
+
+Some @code{gawk} source code fragments depend on a preprocessor define
+@samp{atarist}.  This basically assumes the TOS environment with @code{gcc}.
+Modify these sections as appropriate if they are not right for your
+environment.  Also see the remarks about @code{AWKPATH} and @code{envsep} in
+@ref{Atari Using, ,Running @code{gawk} on the Atari ST}.
+
+As shipped, the sample @file{config.h} claims that the @code{system}
+function is missing from the libraries, which is not true, and an
+alternative implementation of this function is provided in
+@file{atari/system.c}.  Depending upon your particular combination of
+shell and operating system, you may wish to change the file to indicate
+that @code{system} is available.
+
+@node Atari Using, , Atari Compiling, Atari Installation
+@appendixsubsec Running @code{gawk} on the Atari ST
+
+An executable version of @code{gawk} should be placed, as usual,
+anywhere in your @code{PATH} where your shell can find it.
+
+While executing, @code{gawk} creates a number of temporary files.  When
+using @code{gcc} libraries for TOS, @code{gawk} looks for either of
+the environment variables @code{TEMP} or @code{TMPDIR}, in that order.
+If either one is found, its value is assumed to be a directory for
+temporary files.  This directory must exist, and if you can spare the
+memory, it is a good idea to put it on a RAM drive.  If neither
+@code{TEMP} nor @code{TMPDIR} are found, then @code{gawk} uses the
+current directory for its temporary files.
+
+The ST version of @code{gawk} searches for its program files as described in
+@ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+The default value for the @code{AWKPATH} variable is taken from
+@code{DEFPATH} defined in @file{Makefile}. The sample @code{gcc}/TOS
+@file{Makefile} for the ST in the distribution sets @code{DEFPATH} to
+@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}.  The search path can be
+modified by explicitly setting @code{AWKPATH} to whatever you wish.
+Note that colons cannot be used on the ST to separate elements in the
+@code{AWKPATH} variable, since they have another, reserved, meaning.
+Instead, you must use a comma to separate elements in the path.  When
+recompiling, the separating character can be modified by initializing
+the @code{envsep} variable in @file{atari/gawkmisc.atr} to another
+value.
+
+Although @code{awk} allows great flexibility in doing I/O redirections
+from within a program, this facility should be used with care on the ST
+running under TOS.  In some circumstances the OS routines for file
+handle pool processing lose track of certain events, causing the
+computer to crash, and requiring a reboot.  Often a warm reboot is
+sufficient.  Fortunately, this happens infrequently, and in rather
+esoteric situations.  In particular, avoid having one part of an
+@code{awk} program using @code{print} statements explicitly redirected
+to @code{"/dev/stdout"}, while other @code{print} statements use the
+default standard output, and a calling shell has redirected standard
+output to a file.
+
+When @code{gawk} is compiled with the ST version of @code{gcc} and its
+usual libraries, it will accept both @samp{/} and @samp{\} as path separators.
+While this is convenient, it should be remembered that this removes one,
+technically valid, character (@samp{/}) from your file names, and that
+it may create problems for external programs, called via the @code{system}
+function, which may not support this convention.  Whenever it is possible
+that a file created by @code{gawk} will be used by some other program,
+use only backslashes.  Also remember that in @code{awk}, backslashes in
+strings have to be doubled in order to get literal backslashes
+(@pxref{Escape Sequences}).
+
+@node Amiga Installation, Bugs, Atari Installation, Installation
+@appendixsec Installing @code{gawk} on an Amiga
+
+@cindex amiga
+@cindex installation, amiga
+You can install @code{gawk} on an Amiga system using a Unix emulation
+environment available via anonymous @code{ftp} from
+@code{ftp.ninemoons.com} in the directory @file{pub/ade/current}.
+This includes a shell based on @code{pdksh}.  The primary component of
+this environment is a Unix emulation library, @file{ixemul.lib}.
+@c could really use more background here, who wrote this, etc.
+
+A more complete distribution for the Amiga is available on 
+the Geek Gadgets CD-ROM from:
+
+@quotation
+CRONUS @*
+1840 E. Warner Road #105-265 @*
+Tempe, AZ 85284  USA @*
+US Toll Free: (800) 804-0833 @*
+Phone: +1-602-491-0442 @*
+FAX: +1-602-491-0048 @*
+Email:  @code{info@@ninemoons.com} @*
+WWW: @code{http://www.ninemoons.com} @*
+Anonymous @code{ftp} site: @code{ftp.ninemoons.com} @*
+@end quotation
+
+Once you have the distribution, you can configure @code{gawk} simply by
+running @code{configure}:
+
+@example
+configure -v m68k-amigaos
+@end example
+
+Then run @code{make}, and you should be all set!
+(If these steps do not work, please send in a bug report;
+@pxref{Bugs, ,Reporting Problems and Bugs}.)
+
+@node Bugs, Other Versions, Amiga Installation, Installation
+@appendixsec Reporting Problems and Bugs
+@display
+@i{There is nothing more dangerous than a bored archeologist.}
+The Hitchhiker's Guide to the Galaxy
+@c the radio show, not the book. :-)
+@end display
+@sp 1
+
+If you have problems with @code{gawk} or think that you have found a bug,
+please report it to the developers; we cannot promise to do anything
+but we might well want to fix it.
+
+Before reporting a bug, make sure you have actually found a real bug.
+Carefully reread the documentation and see if it really says you can do
+what you're trying to do.  If it's not clear whether you should be able
+to do something or not, report that too; it's a bug in the documentation!
+
+Before reporting a bug or trying to fix it yourself, try to isolate it
+to the smallest possible @code{awk} program and input data file that
+reproduces the problem.  Then send us the program and data file,
+some idea of what kind of Unix system you're using, and the exact results
+@code{gawk} gave you.  Also say what you expected to occur; this will help
+us decide whether the problem was really in the documentation.
+
+Once you have a precise problem, there are two e-mail addresses you
+can send mail to.
+
+@table @asis
+@item Internet:
+@samp{bug-gnu-utils@@prep.ai.mit.edu}
+
+@item UUCP:
+@samp{uunet!prep.ai.mit.edu!bug-gnu-utils}
+@end table
+
+Please include the
+version number of @code{gawk} you are using.  You can get this information
+with the command @samp{gawk --version}.
+You should send a carbon copy of your mail to Arnold Robbins, who can
+be reached at @samp{arnold@@gnu.ai.mit.edu}.
+
+@cindex @code{comp.lang.awk}
+@strong{Important!} Do @emph{not} try to report bugs in @code{gawk} by
+posting to the Usenet/Internet newsgroup @code{comp.lang.awk}.
+While the @code{gawk} developers do occasionally read this newsgroup,
+there is no guarantee that we will see your posting.  The steps described
+above are the official, recognized ways for reporting bugs.
+
+Non-bug suggestions are always welcome as well.  If you have questions
+about things that are unclear in the documentation or are just obscure
+features, ask Arnold Robbins; he will try to help you out, although he
+may not have the time to fix the problem.  You can send him electronic
+mail at the Internet address above.
+
+If you find bugs in one of the non-Unix ports of @code{gawk}, please send
+an electronic mail message to the person who maintains that port.  They
+are listed below, and also in the @file{README} file in the @code{gawk}
+distribution.  Information in the @file{README} file should be considered
+authoritative if it conflicts with this @value{DOCUMENT}.
+
+@c NEEDED for looks
+@page
+The people maintaining the non-Unix ports of @code{gawk} are:
+
+@cindex Deifik, Scott
+@cindex Fish, Fred
+@cindex Hankerson, Darrel
+@cindex Jaegermann, Michal
+@cindex Rankin, Pat
+@cindex Rommel, Kai Uwe
+@table @asis
+@item MS-DOS
+Scott Deifik, @samp{scottd@@amgen.com}, and
+Darrel Hankerson, @samp{hankedr@@mail.auburn.edu}.
+
+@item OS/2
+Kai Uwe Rommel, @samp{rommel@@ars.de}.
+
+@item VMS
+Pat Rankin, @samp{rankin@@eql.caltech.edu}.
+
+@item Atari ST
+Michal Jaegermann, @samp{michal@@gortel.phys.ualberta.ca}.
+
+@item Amiga
+Fred Fish, @samp{fnf@@ninemoons.com}.
+@end table
+
+If your bug is also reproducible under Unix, please send copies of your
+report to the general GNU bug list, as well as to Arnold Robbins, at the
+addresses listed above.
+
+@node Other Versions, , Bugs, Installation
+@appendixsec Other Freely Available @code{awk} Implementations
+@cindex Brennan, Michael
+@ignore
+From: emory!amc.com!brennan (Michael Brennan)
+Subject: C++ comments in awk programs
+To: arnold@gnu.ai.mit.edu (Arnold Robbins)
+Date: Wed, 4 Sep 1996 08:11:48 -0700 (PDT)
+
+@end ignore
+@display
+@i{It's kind of fun to put comments like this in your awk code.}
+      @code{// Do C++ comments work? answer: yes! of course}
+Michael Brennan
+@end display
+@sp 1
+
+There are two other freely available @code{awk} implementations.
+This section briefly describes where to get them.
+
+@table @asis
+@cindex Kernighan, Brian
+@cindex anonymous @code{ftp}
+@cindex @code{ftp}, anonymous
+@item Unix @code{awk}
+Brian Kernighan has been able to make his implementation of
+@code{awk} freely available.  You can get it via anonymous @code{ftp}
+to the host @code{@w{netlib.att.com}}.  Change directory to
+@file{/netlib/research}. Use ``binary'' or ``image'' mode, and
+retrieve @file{awk.bundle.Z}.
+
+This is a shell archive that has been compressed with the @code{compress}
+utility. It can be uncompressed with either @code{uncompress} or the
+GNU @code{gunzip} utility.
+
+This version requires an ANSI C compiler; GCC (the GNU C compiler)
+works quite nicely.
+
+@cindex Brennan, Michael
+@cindex @code{mawk}
+@item @code{mawk}
+Michael Brennan has written an independent implementation of @code{awk},
+called @code{mawk}.  It is available under the GPL
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}),
+just as @code{gawk} is.
+
+You can get it via anonymous @code{ftp} to the host
+@code{@w{ftp.whidbey.net}}.  Change directory to @file{/pub/brennan}.
+Use ``binary'' or ``image'' mode, and retrieve @file{mawk1.3.3.tar.gz}
+(or the latest version that is there).
+
+@code{gunzip} may be used to decompress this file. Installation
+is similar to @code{gawk}'s
+(@pxref{Unix Installation, , Compiling and Installing @code{gawk} on Unix}).
+@end table
+
+@node Notes, Glossary, Installation, Top
+@appendix Implementation Notes
+
+This appendix contains information mainly of interest to implementors and
+maintainers of @code{gawk}.  Everything in it applies specifically to
+@code{gawk}, and not to other implementations.
+
+@menu
+* Compatibility Mode::          How to disable certain @code{gawk} extensions.
+* Additions::                   Making Additions To @code{gawk}.
+* Future Extensions::           New features that may be implemented one day.
+* Improvements::                Suggestions for improvements by volunteers.
+@end menu
+
+@node Compatibility Mode, Additions, Notes, Notes
+@appendixsec Downward Compatibility and Debugging
+
+@xref{POSIX/GNU, ,Extensions in @code{gawk} Not in POSIX @code{awk}},
+for a summary of the GNU extensions to the @code{awk} language and program.
+All of these features can be turned off by invoking @code{gawk} with the
+@samp{--traditional} option, or with the @samp{--posix} option.
+
+If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
+is one more option available on the command line:
+
+@table @code
+@item -W parsedebug
+@itemx --parsedebug
+Print out the parse stack information as the program is being parsed.
+@end table
+
+This option is intended only for serious @code{gawk} developers,
+and not for the casual user.  It probably has not even been compiled into
+your version of @code{gawk}, since it slows down execution.
+
+@node Additions, Future Extensions, Compatibility Mode, Notes
+@appendixsec Making Additions to @code{gawk}
+
+If you should find that you wish to enhance @code{gawk} in a significant
+fashion, you are perfectly free to do so.  That is the point of having
+free software; the source code is available, and you are free to change
+it as you wish (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+
+This section discusses the ways you might wish to change @code{gawk},
+and any considerations you should bear in mind.
+
+@menu
+* Adding Code::             Adding code to the main body of @code{gawk}.
+* New Ports::               Porting @code{gawk} to a new operating system.
+@end menu
+
+@node Adding Code, New Ports, Additions, Additions
+@appendixsubsec Adding New Features
+
+@cindex adding new features
+@cindex features, adding
+You are free to add any new features you like to @code{gawk}.
+However, if you want your changes to be incorporated into the @code{gawk}
+distribution, there are several steps that you need to take in order to
+make it possible for me to include to your changes.
+
+@enumerate 1
+@item
+Get the latest version.
+It is much easier for me to integrate changes if they are relative to
+the most recent distributed version of @code{gawk}.  If your version of
+@code{gawk} is very old, I may not be able to integrate them at all.
+@xref{Getting, ,Getting the @code{gawk} Distribution},
+for information on getting the latest version of @code{gawk}.
+
+@item
+@iftex
+Follow the @cite{GNU Coding Standards}.
+@end iftex
+@ifinfo
+See @inforef{Top, , Version, standards, GNU Coding Standards}.
+@end ifinfo
+This document describes how GNU software should be written. If you haven't
+read it, please do so, preferably @emph{before} starting to modify @code{gawk}.
+(The @cite{GNU Coding Standards} are available as part of the Autoconf
+distribution, from the FSF.)
+
+@cindex @code{gawk} coding style
+@cindex coding style used in @code{gawk}
+@item
+Use the @code{gawk} coding style.
+The C code for @code{gawk} follows the instructions in the
+@cite{GNU Coding Standards}, with minor exceptions.  The code is formatted
+using the traditional ``K&R'' style, particularly as regards the placement
+of braces and the use of tabs.  In brief, the coding rules for @code{gawk}
+are:
+
+@itemize @bullet
+@item
+Use old style (non-prototype) function headers when defining functions.
+
+@item
+Put the name of the function at the beginning of its own line.
+
+@item
+Put the return type of the function, even if it is @code{int}, on the
+line above the line with the name and arguments of the function.
+
+@item
+The declarations for the function arguments should not be indented.
+
+@item
+Put spaces around parentheses used in control structures
+(@code{if}, @code{while}, @code{for}, @code{do}, @code{switch}
+and @code{return}).
+
+@item
+Do not put spaces in front of parentheses used in function calls.
+
+@item
+Put spaces around all C operators, and after commas in function calls.
+
+@item
+Do not use the comma operator to produce multiple side-effects, except
+in @code{for} loop initialization and increment parts, and in macro bodies.
+
+@item
+Use real tabs for indenting, not spaces.
+
+@item
+Use the ``K&R'' brace layout style.
+
+@item
+Use comparisons against @code{NULL} and @code{'\0'} in the conditions of
+@code{if}, @code{while} and @code{for} statements, and in the @code{case}s
+of @code{switch} statements, instead of just the
+plain pointer or character value.
+
+@item
+Use the @code{TRUE}, @code{FALSE}, and @code{NULL} symbolic constants,
+and the character constant @code{'\0'} where appropriate, instead of @code{1}
+and @code{0}.
+
+@item
+Provide one-line descriptive comments for each function.
+
+@item
+Do not use @samp{#elif}. Many older Unix C compilers cannot handle it.
+
+@item
+Do not use the @code{alloca} function for allocating memory off the stack.
+Its use causes more portability trouble than the minor benefit of not having
+to free the storage. Instead, use @code{malloc} and @code{free}.
+@end itemize
+
+If I have to reformat your code to follow the coding style used in
+@code{gawk}, I may not bother.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your changes, you must either place
+those changes in the public domain, and submit a signed statement to that
+effect, or assign the copyright in your changes to the FSF.
+Both of these actions are easy to do, and @emph{many} people have done so
+already. If you have questions, please contact me
+(@pxref{Bugs, , Reporting Problems and Bugs}),
+or @code{gnu@@prep.ai.mit.edu}.
+
+@item
+Update the documentation.
+Along with your new code, please supply new sections and or chapters
+for this @value{DOCUMENT}.  If at all possible, please use real
+Texinfo, instead of just supplying unformatted ASCII text (although
+even that is better than no documentation at all).
+Conventions to be followed in @cite{@value{TITLE}} are provided
+after the @samp{@@bye} at the end of the Texinfo source file. 
+If possible, please update the man page as well.
+
+You will also have to sign paperwork for your documentation changes.
+
+@item
+Submit changes as context diffs or unified diffs.
+Use @samp{diff -c -r -N} or @samp{diff -u -r -N} to compare
+the original @code{gawk} source tree with your version.
+(I find context diffs to be more readable, but unified diffs are
+more compact.)
+I recommend using the GNU version of @code{diff}.
+Send the output produced by either run of @code{diff} to me when you
+submit your changes.
+@xref{Bugs, , Reporting Problems and Bugs}, for the electronic mail
+information.
+
+Using this format makes it easy for me to apply your changes to the
+master version of the @code{gawk} source code (using @code{patch}).
+If I have to apply the changes manually, using a text editor, I may
+not do so, particularly if there are lots of changes.
+@end enumerate
+
+Although this sounds like a lot of work, please remember that while you
+may write the new code, I have to maintain it and support it, and if it
+isn't possible for me to do that with a minimum of extra work, then I
+probably will not.
+
+@node New Ports, , Adding Code, Additions
+@appendixsubsec Porting @code{gawk} to a New Operating System
+
+@cindex porting @code{gawk}
+If you wish to port @code{gawk} to a new operating system, there are
+several steps to follow.
+
+@enumerate 1
+@item
+Follow the guidelines in
+@ref{Adding Code, ,Adding New Features},
+concerning coding style, submission of diffs, and so on.
+
+@item
+When doing a port, bear in mind that your code must co-exist peacefully
+with the rest of @code{gawk}, and the other ports. Avoid gratuitous
+changes to the system-independent parts of the code. If at all possible,
+avoid sprinkling @samp{#ifdef}s just for your port throughout the
+code.
+
+If the changes needed for a particular system affect too much of the
+code, I probably will not accept them.  In such a case, you will, of course,
+be able to distribute your changes on your own, as long as you comply
+with the GPL
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+
+@item
+A number of the files that come with @code{gawk} are maintained by other
+people at the Free Software Foundation.  Thus, you should not change them
+unless it is for a very good reason. I.e.@: changes are not out of the
+question, but changes to these files will be scrutinized extra carefully.
+The files are @file{alloca.c}, @file{getopt.h}, @file{getopt.c},
+@file{getopt1.c}, @file{regex.h}, @file{regex.c}, @file{dfa.h},
+@file{dfa.c}, @file{install-sh}, and @file{mkinstalldirs}.
+
+@item
+Be willing to continue to maintain the port.
+Non-Unix operating systems are supported by volunteers who maintain
+the code needed to compile and run @code{gawk} on their systems. If no-one
+volunteers to maintain a port, that port becomes unsupported, and it may
+be necessary to remove it from the distribution.
+
+@item
+Supply an appropriate @file{gawkmisc.???} file.
+Each port has its own @file{gawkmisc.???} that implements certain
+operating system specific functions. This is cleaner than a plethora of
+@samp{#ifdef}s scattered throughout the code.  The @file{gawkmisc.c} in
+the main source directory includes the appropriate
+@file{gawkmisc.???} file from each subdirectory.
+Be sure to update it as well.
+
+Each port's @file{gawkmisc.???} file has a suffix reminiscent of the machine
+or operating system for the port. For example, @file{pc/gawkmisc.pc} and
+@file{vms/gawkmisc.vms}. The use of separate suffixes, instead of plain
+@file{gawkmisc.c}, makes it possible to move files from a port's subdirectory
+into the main subdirectory, without accidentally destroying the real
+@file{gawkmisc.c} file.  (Currently, this is only an issue for the MS-DOS
+and OS/2 ports.)
+
+@item
+Supply a @file{Makefile} and any other C source and header files that are
+necessary for your operating system.  All your code should be in a
+separate subdirectory, with a name that is the same as, or reminiscent
+of, either your operating system or the computer system.  If possible,
+try to structure things so that it is not necessary to move files out
+of the subdirectory into the main source directory.  If that is not
+possible, then be sure to avoid using names for your files that
+duplicate the names of files in the main source directory.
+
+@item
+Update the documentation.
+Please write a section (or sections) for this @value{DOCUMENT} describing the
+installation and compilation steps needed to install and/or compile
+@code{gawk} for your system.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your code, you must either place
+your code in the public domain, and submit a signed statement to that
+effect, or assign the copyright in your code to the FSF.
+@ifinfo
+Both of these actions are easy to do, and @emph{many} people have done so
+already. If you have questions, please contact me, or
+@code{gnu@@prep.ai.mit.edu}.
+@end ifinfo
+@end enumerate
+
+Following these steps will make it much easier to integrate your changes
+into @code{gawk}, and have them co-exist happily with the code for other
+operating systems that is already there.
+
+In the code that you supply, and that you maintain, feel free to use a
+coding style and brace layout that suits your taste.
+
+@node Future Extensions, Improvements, Additions, Notes
+@appendixsec Probable Future Extensions
+@ignore
+From emory!scalpel.netlabs.com!lwall Tue Oct 31 12:43:17 1995
+Return-Path: <emory!scalpel.netlabs.com!lwall>
+Message-Id: <9510311732.AA28472@scalpel.netlabs.com>
+To: arnold@skeeve.atl.ga.us (Arnold D. Robbins)
+Subject: Re: May I quote you? 
+In-Reply-To: Your message of "Tue, 31 Oct 95 09:11:00 EST."
+             <m0tAHPQ-00014MC@skeeve.atl.ga.us> 
+Date: Tue, 31 Oct 95 09:32:46 -0800
+From: Larry Wall <emory!scalpel.netlabs.com!lwall>
+
+: Greetings. I am working on the release of gawk 3.0. Part of it will be a
+: thoroughly updated manual. One of the sections deals with planned future
+: extensions and enhancements.  I have the following at the beginning
+: of it:
+: 
+: @cindex PERL
+: @cindex Wall, Larry
+: @display
+: @i{AWK is a language similar to PERL, only considerably more elegant.} @*
+: Arnold Robbins
+: @sp 1
+: @i{Hey!} @*
+: Larry Wall
+: @end display
+: 
+: Before I actually release this for publication, I wanted to get your
+: permission to quote you.  (Hopefully, in the spirit of much of GNU, the
+: implied humor is visible... :-)
+
+I think that would be fine.
+
+Larry
+@end ignore
+@cindex PERL
+@cindex Wall, Larry
+@display
+@i{AWK is a language similar to PERL, only considerably more elegant.}
+Arnold Robbins
+
+@i{Hey!}
+Larry Wall
+@end display
+@sp 1
+
+This section briefly lists extensions and possible improvements
+that indicate the directions we are
+currently considering for @code{gawk}.  The file @file{FUTURES} in the
+@code{gawk} distributions lists these extensions as well.
+
+This is a list of probable future changes that will be usable by the
+@code{awk} language programmer.
+
+@c these are ordered by likelihood
+@table @asis
+@item Localization
+The GNU project is starting to support multiple languages.
+It will at least be possible to make @code{gawk} print its warnings and
+error messages in languages other than English.
+It may be possible for @code{awk} programs to also use the multiple
+language facilities, separate from @code{gawk} itself.
+
+@item Databases
+It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
+
+@item A @code{PROCINFO} Array
+The special files that provide process-related information
+(@pxref{Special Files, ,Special File Names in @code{gawk}})
+may be superseded by a @code{PROCINFO} array that would provide the same
+information, in an easier to access fashion.
+
+@item More @code{lint} warnings
+There are more things that could be checked for portability.
+
+@item Control of subprocess environment
+Changes made in @code{gawk} to the array @code{ENVIRON} may be
+propagated to subprocesses run by @code{gawk}.
+
+@ignore
+@item @code{RECLEN} variable for fixed length records
+Along with @code{FIELDWIDTHS}, this would speed up the processing of
+fixed-length records.
+
+@item A @code{restart} keyword
+After modifying @code{$0}, @code{restart} would restart the pattern
+matching loop, without reading a new record from the input.
+
+@item A @samp{|&} redirection
+The @samp{|&} redirection, in place of @samp{|}, would open a two-way
+pipeline for communication with a sub-process (via @code{getline} and
+@code{print} and @code{printf}).
+
+@item Function valued variables
+It would be possible to assign the name of a user-defined or built-in
+function to a regular @code{awk} variable, and then call the function
+indirectly, by using the regular variable.  This would make it possible
+to write general purpose sorting and comparing routines, for example,
+by simply passing the name of one function into another.
+
+@item A built-in @code{stat} function
+The @code{stat} function would provide an easy-to-use hook to the
+@code{stat} system call so that @code{awk} programs could determine information
+about files.
+
+@item A built-in @code{ftw} function
+Combined with function valued variables and the @code{stat} function,
+@code{ftw} (file tree walk) would make it easy for an @code{awk} program
+to walk an entire file tree.
+@end ignore
+@end table
+
+This is a list of probable improvements that will make @code{gawk}
+perform better.
+
+@table @asis
+@item An Improved Version of @code{dfa}
+The @code{dfa} pattern matcher from GNU @code{grep} has some
+problems. Either a new version or a fixed one will deal with some
+important regexp matching issues.
+
+@item Use of GNU @code{malloc}
+The GNU version of @code{malloc} could potentially speed up @code{gawk},
+since it relies heavily on the use of dynamic memory allocation.
+
+@item Use of the @code{rx} regexp library
+The @code{rx} regular expression library could potentially speed up
+all regexp operations that require knowing the exact location of matches.
+This includes record termination, field and array splitting,
+and the @code{sub}, @code{gsub}, @code{gensub} and @code{match} functions.
+@end table
+
+@node Improvements,  , Future Extensions, Notes
+@appendixsec Suggestions for Improvements
+
+Here are some projects that would-be @code{gawk} hackers might like to take
+on.  They vary in size from a few days to a few weeks of programming,
+depending on which one you choose and how fast a programmer you are.  Please
+send any improvements you write to the maintainers at the GNU project.
+@xref{Adding Code, , Adding New Features},
+for guidelines to follow when adding new features to @code{gawk}.
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on
+contacting the maintainers.
+
+@enumerate
+@item
+Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
+parser to convert the script given it into a syntax tree; the syntax
+tree is then executed by a simple recursive evaluator.  This method incurs
+a lot of overhead, since the recursive evaluator performs many procedure
+calls to do even the simplest things.
+
+It should be possible for @code{gawk} to convert the script's parse tree
+into a C program which the user would then compile, using the normal
+C compiler and a special @code{gawk} library to provide all the needed
+functions (regexps, fields, associative arrays, type coercion, and so
+on).
+
+An easier possibility might be for an intermediate phase of @code{awk} to
+convert the parse tree into a linear byte code form like the one used
+in GNU Emacs Lisp.  The recursive evaluator would then be replaced by
+a straight line byte code interpreter that would be intermediate in speed
+between running a compiled program and doing what @code{gawk} does
+now.
+
+@item
+The programs in the test suite could use documenting in this @value{DOCUMENT}.
+
+@item
+See the @file{FUTURES} file for more ideas.  Contact us if you would
+seriously like to tackle any of the items listed there.
+@end enumerate
+
+@node Glossary, Copying, Notes, Top
+@appendix Glossary
+
+@table @asis
+@item Action
+A series of @code{awk} statements attached to a rule.  If the rule's
+pattern matches an input record, @code{awk} executes the
+rule's action.  Actions are always enclosed in curly braces.
+@xref{Action Overview, ,Overview of Actions}.
+
+@item Amazing @code{awk} Assembler
+Henry Spencer at the University of Toronto wrote a retargetable assembler
+completely as @code{awk} scripts.  It is thousands of lines long, including
+machine descriptions for several eight-bit microcomputers.
+It is a good example of a
+program that would have been better written in another language.
+
+@item Amazingly Workable Formatter (@code{awf})
+Henry Spencer at the University of Toronto wrote a formatter that accepts
+a large subset of the @samp{nroff -ms} and @samp{nroff -man} formatting
+commands, using @code{awk} and @code{sh}.
+
+@item ANSI
+The American National Standards Institute.  This organization produces
+many standards, among them the standards for the C and C++ programming
+languages.
+
+@item Assignment
+An @code{awk} expression that changes the value of some @code{awk}
+variable or data object.  An object that you can assign to is called an
+@dfn{lvalue}.  The assigned values are called @dfn{rvalues}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+@item @code{awk} Language
+The language in which @code{awk} programs are written.
+
+@item @code{awk} Program
+An @code{awk} program consists of a series of @dfn{patterns} and
+@dfn{actions}, collectively known as @dfn{rules}.  For each input record
+given to the program, the program's rules are all processed in turn.
+@code{awk} programs may also contain function definitions.
+
+@item @code{awk} Script
+Another name for an @code{awk} program.
+
+@item Bash
+The GNU version of the standard shell (the Bourne-Again shell).
+See ``Bourne Shell.''
+
+@item BBS
+See ``Bulletin Board System.''
+
+@item Boolean Expression
+Named after the English mathematician Boole. See ``Logical Expression.''
+
+@item Bourne Shell
+The standard shell (@file{/bin/sh}) on Unix and Unix-like systems,
+originally written by Steven R.@: Bourne.
+Many shells (Bash, @code{ksh}, @code{pdksh}, @code{zsh}) are
+generally upwardly compatible with the Bourne shell.
+
+@item Built-in Function
+The @code{awk} language provides built-in functions that perform various
+numerical, time stamp related, and string computations.  Examples are
+@code{sqrt} (for the square root of a number) and @code{substr} (for a
+substring of a string).  @xref{Built-in, ,Built-in Functions}.
+
+@item Built-in Variable
+@code{ARGC}, @code{ARGIND}, @code{ARGV}, @code{CONVFMT}, @code{ENVIRON},
+@code{ERRNO}, @code{FIELDWIDTHS}, @code{FILENAME}, @code{FNR}, @code{FS},
+@code{IGNORECASE}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
+@code{RLENGTH}, @code{RSTART}, @code{RS}, @code{RT}, and @code{SUBSEP},
+are the variables that have special meaning to @code{awk}.
+Changing some of them affects @code{awk}'s running environment.
+Several of these variables are specific to @code{gawk}.
+@xref{Built-in Variables}.
+
+@item Braces
+See ``Curly Braces.''
+
+@item Bulletin Board System
+A computer system allowing users to log in and read and/or leave messages
+for other users of the system, much like leaving paper notes on a bulletin
+board.
+
+@item C
+The system programming language that most GNU software is written in.  The
+@code{awk} programming language has C-like syntax, and this @value{DOCUMENT}
+points out similarities between @code{awk} and C when appropriate.
+
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+@item Character Set
+The set of numeric codes used by a computer system to represent the
+characters (letters, numbers, punctuation, etc.) of a particular country
+or place. The most common character set in use today is ASCII (American
+Standard Code for Information Interchange).  Many European
+countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1).
+
+@item CHEM
+A preprocessor for @code{pic} that reads descriptions of molecules
+and produces @code{pic} input for drawing them.  It was written in @code{awk}
+by Brian Kernighan and Jon Bentley, and is available from
+@code{@w{netlib@@research.att.com}}.
+
+@item Compound Statement
+A series of @code{awk} statements, enclosed in curly braces.  Compound
+statements may be nested.
+@xref{Statements, ,Control Statements in Actions}.
+
+@item Concatenation
+Concatenating two strings means sticking them together, one after another,
+giving a new string.  For example, the string @samp{foo} concatenated with
+the string @samp{bar} gives the string @samp{foobar}.
+@xref{Concatenation, ,String Concatenation}.
+
+@item Conditional Expression
+An expression using the @samp{?:} ternary operator, such as
+@samp{@var{expr1} ? @var{expr2} : @var{expr3}}.  The expression
+@var{expr1} is evaluated; if the result is true, the value of the whole
+expression is the value of @var{expr2}, otherwise the value is
+@var{expr3}.  In either case, only one of @var{expr2} and @var{expr3}
+is evaluated.  @xref{Conditional Exp, ,Conditional Expressions}.
+
+@item Comparison Expression
+A relation that is either true or false, such as @samp{(a < b)}.
+Comparison expressions are used in @code{if}, @code{while}, @code{do},
+and @code{for}
+statements, and in patterns to select which input records to process.
+@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+@item Curly Braces
+The characters @samp{@{} and @samp{@}}.  Curly braces are used in
+@code{awk} for delimiting actions, compound statements, and function
+bodies.
+
+@item Dark Corner
+An area in the language where specifications often were (or still
+are) not clear, leading to unexpected or undesirable behavior.
+Such areas are marked in this @value{DOCUMENT} with ``(d.c.)'' in the
+text, and are indexed under the heading ``dark corner.''
+
+@item Data Objects
+These are numbers and strings of characters.  Numbers are converted into
+strings and vice versa, as needed.
+@xref{Conversion, ,Conversion of Strings and Numbers}.
+
+@item Double Precision
+An internal representation of numbers that can have fractional parts.
+Double precision numbers keep track of more digits than do single precision
+numbers, but operations on them are more expensive.  This is the way
+@code{awk} stores numeric values.  It is the C type @code{double}.
+
+@item Dynamic Regular Expression
+A dynamic regular expression is a regular expression written as an
+ordinary expression.  It could be a string constant, such as
+@code{"foo"}, but it may also be an expression whose value can vary.
+@xref{Computed Regexps, , Using Dynamic Regexps}.
+
+@item Environment
+A collection of strings, of the form @var{name@code{=}val}, that each
+program has available to it. Users generally place values into the
+environment in order to provide information to various programs. Typical
+examples are the environment variables @code{HOME} and @code{PATH}.
+
+@item Empty String
+See ``Null String.''
+
+@item Escape Sequences
+A special sequence of characters used for describing non-printing
+characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
+ESC (escape) character.  @xref{Escape Sequences}.
+
+@item Field
+When @code{awk} reads an input record, it splits the record into pieces
+separated by whitespace (or by a separator regexp which you can
+change by setting the built-in variable @code{FS}).  Such pieces are
+called fields.  If the pieces are of fixed length, you can use the built-in
+variable @code{FIELDWIDTHS} to describe their lengths.
+@xref{Field Separators, ,Specifying How Fields are Separated},
+and also see
+@xref{Constant Size, , Reading Fixed-width Data}.
+
+@item Floating Point Number
+Often referred to in mathematical terms as a ``rational'' number, this is
+just a number that can have a fractional part.
+See ``Double Precision'' and ``Single Precision.''
+
+@item Format
+Format strings are used to control the appearance of output in the
+@code{printf} statement.  Also, data conversions from numbers to strings
+are controlled by the format string contained in the built-in variable
+@code{CONVFMT}.  @xref{Control Letters, ,Format-Control Letters}.
+
+@item Function
+A specialized group of statements used to encapsulate general
+or program-specific tasks.  @code{awk} has a number of built-in
+functions, and also allows you to define your own.
+@xref{Built-in, ,Built-in Functions},
+and @ref{User-defined, ,User-defined Functions}.
+
+@item FSF
+See ``Free Software Foundation.''
+
+@item Free Software Foundation
+A non-profit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor.  GNU Emacs is the most widely used version of Emacs today.
+
+@item @code{gawk}
+The GNU implementation of @code{awk}.
+
+@item General Public License
+This document describes the terms under which @code{gawk} and its source
+code may be distributed. (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE})
+
+@item GNU
+``GNU's not Unix''.  An on-going project of the Free Software Foundation
+to create a complete, freely distributable, POSIX-compliant computing
+environment.
+
+@item GPL
+See ``General Public License.''
+
+@item Hexadecimal
+Base 16 notation, where the digits are @code{0}-@code{9} and
+@code{A}-@code{F}, with @samp{A}
+representing 10, @samp{B} representing 11, and so on up to @samp{F} for 15.
+Hexadecimal numbers are written in C using a leading @samp{0x},
+to indicate their base.  Thus, @code{0x12} is 18 (one times 16 plus 2).
+
+@item I/O
+Abbreviation for ``Input/Output,'' the act of moving data into and/or
+out of a running program.
+
+@item Input Record
+A single chunk of data read in by @code{awk}.  Usually, an @code{awk} input
+record consists of one line of text.
+@xref{Records, ,How Input is Split into Records}.
+
+@item Integer
+A whole number, i.e.@: a number that does not have a fractional part.
+
+@item Keyword
+In the @code{awk} language, a keyword is a word that has special
+meaning.  Keywords are reserved and may not be used as variable names.
+
+@code{gawk}'s keywords are:
+@code{BEGIN},
+@code{END},
+@code{if},
+@code{else},
+@code{while},
+@code{do@dots{}while},
+@code{for},
+@code{for@dots{}in},
+@code{break},
+@code{continue},
+@code{delete},
+@code{next},
+@code{nextfile},
+@code{function},
+@code{func},
+and @code{exit}.
+
+@item Logical Expression
+An expression using the operators for logic, AND, OR, and NOT, written
+@samp{&&}, @samp{||}, and @samp{!} in @code{awk}. Often called Boolean
+expressions, after the mathematician who pioneered this kind of
+mathematical logic.
+
+@item Lvalue
+An expression that can appear on the left side of an assignment
+operator.  In most languages, lvalues can be variables or array
+elements.  In @code{awk}, a field designator can also be used as an
+lvalue.
+
+@item Null String
+A string with no characters in it.  It is represented explicitly in
+@code{awk} programs by placing two double-quote characters next to
+each other (@code{""}).  It can appear in input data by having two successive
+occurrences of the field separator appear next to each other.
+
+@item Number
+A numeric valued data object.  The @code{gawk} implementation uses double
+precision floating point to represent numbers.
+Very old @code{awk} implementations use single precision floating
+point.
+
+@item Octal
+Base-eight notation, where the digits are @code{0}-@code{7}.
+Octal numbers are written in C using a leading @samp{0},
+to indicate their base.  Thus, @code{013} is 11 (one times 8 plus 3).
+
+@item Pattern
+Patterns tell @code{awk} which input records are interesting to which
+rules.
+
+A pattern is an arbitrary conditional expression against which input is
+tested.  If the condition is satisfied, the pattern is said to @dfn{match}
+the input record.  A typical pattern might compare the input record against
+a regular expression.  @xref{Pattern Overview, ,Pattern Elements}.
+
+@item POSIX
+The name for a series of standards being developed by the IEEE
+that specify a Portable Operating System interface.  The ``IX'' denotes
+the Unix heritage of these standards.  The main standard of interest for
+@code{awk} users is
+@cite{IEEE Standard for Information Technology, Standard 1003.2-1992,
+Portable Operating System Interface (POSIX) Part 2: Shell and Utilities}.
+Informally, this standard is often referred to as simply ``P1003.2.''
+
+@item Private
+Variables and/or functions that are meant for use exclusively by library
+functions, and not for the main @code{awk} program. Special care must be
+taken when naming such variables and functions.
+@xref{Library Names,  ,  Naming Library Function Global Variables}.
+
+@item Range (of input lines)
+A sequence of consecutive lines from the input file.  A pattern
+can specify ranges of input lines for @code{awk} to process, or it can
+specify single lines.  @xref{Pattern Overview, ,Pattern Elements}.
+
+@item Recursion
+When a function calls itself, either directly or indirectly.
+If this isn't clear, refer to the entry for ``recursion.''
+
+@item Redirection
+Redirection means performing input from other than the standard input
+stream, or output to other than the standard output stream.
+
+You can redirect the output of the @code{print} and @code{printf} statements
+to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
+operators.  You can redirect input to the @code{getline} statement using
+the @samp{<} and @samp{|} operators.
+@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}},
+and @ref{Getline, ,Explicit Input with @code{getline}}.
+
+@item Regexp
+Short for @dfn{regular expression}.  A regexp is a pattern that denotes a
+set of strings, possibly an infinite set.  For example, the regexp
+@samp{R.*xp} matches any string starting with the letter @samp{R}
+and ending with the letters @samp{xp}.  In @code{awk}, regexps are
+used in patterns and in conditional expressions.  Regexps may contain
+escape sequences.  @xref{Regexp, ,Regular Expressions}.
+
+@item Regular Expression
+See ``regexp.''
+
+@item Regular Expression Constant
+A regular expression constant is a regular expression written within
+slashes, such as @code{/foo/}.  This regular expression is chosen
+when you write the @code{awk} program, and cannot be changed doing
+its execution.  @xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Rule
+A segment of an @code{awk} program that specifies how to process single
+input records.  A rule consists of a @dfn{pattern} and an @dfn{action}.
+@code{awk} reads an input record; then, for each rule, if the input record
+satisfies the rule's pattern, @code{awk} executes the rule's action.
+Otherwise, the rule does nothing for that input record.
+
+@item Rvalue
+A value that can appear on the right side of an assignment operator.
+In @code{awk}, essentially every expression has a value. These values
+are rvalues.
+
+@item @code{sed}
+See ``Stream Editor.''
+
+@item Short-Circuit
+The nature of the @code{awk} logical operators @samp{&&} and @samp{||}.
+If the value of the entire expression can be deduced from evaluating just
+the left-hand side of these operators, the right-hand side will not
+be evaluated
+(@pxref{Boolean Ops, ,Boolean Expressions}).
+
+@item Side Effect
+A side effect occurs when an expression has an effect aside from merely
+producing a value.  Assignment expressions, increment and decrement
+expressions and function calls have side effects.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+@item Single Precision
+An internal representation of numbers that can have fractional parts.
+Single precision numbers keep track of fewer digits than do double precision
+numbers, but operations on them are less expensive in terms of CPU time.
+This is the type used by some very old versions of @code{awk} to store
+numeric values.  It is the C type @code{float}.
+
+@item Space
+The character generated by hitting the space bar on the keyboard.
+
+@item Special File
+A file name interpreted internally by @code{gawk}, instead of being handed
+directly to the underlying operating system.  For example, @file{/dev/stderr}.
+@xref{Special Files, ,Special File Names in @code{gawk}}.
+
+@item Stream Editor
+A program that reads records from an input stream and processes them one
+or more at a time.  This is in contrast with batch programs, which may
+expect to read their input files in entirety before starting to do
+anything, and with interactive programs, which require input from the
+user.
+
+@item String
+A datum consisting of a sequence of characters, such as @samp{I am a
+string}.  Constant strings are written with double-quotes in the
+@code{awk} language, and may contain escape sequences.
+@xref{Escape Sequences}.
+
+@item Tab
+The character generated by hitting the @kbd{TAB} key on the keyboard.
+It usually expands to up to eight spaces upon output.
+
+@item Unix
+A computer operating system originally developed in the early 1970's at
+AT&T Bell Laboratories.  It initially became popular in universities around
+the world, and later moved into commercial evnironments as a software
+development system and network server system. There are many commercial
+versions of Unix, as well as several work-alike systems whose source code
+is freely available (such as Linux, NetBSD, and FreeBSD).
+
+@item Whitespace
+A sequence of space, tab, or newline characters occurring inside an input
+record or a string.
+@end table
+
+@node Copying, Index, Glossary, Top
+@unnumbered GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place --- Suite 330, Boston, MA 02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@c fakenode --- for prepinfo
+@unnumberedsec Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@c fakenode --- for prepinfo
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate 0
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term ``modification''.)  Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License.  (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code.  (This alternative is
+allowed only for non-commercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@c fakenode --- for prepinfo
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW@.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE@.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU@.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@c fakenode --- for prepinfo
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@c fakenode --- for prepinfo
+@unnumberedsec How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and an idea of what it does.}
+Copyright (C) 19@var{yy}  @var{name of author}
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE@.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place --- Suite 330, Boston, MA 02111-1307, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
+type `show w'.  This is free software, and you are welcome
+to redistribute it under certain conditions; type `show c' 
+for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License.  Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary.  Here is a sample; alter the names:
+
+@smallexample
+@group
+Yoyodyne, Inc., hereby disclaims all copyright
+interest in the program `Gnomovision'
+(which makes passes at compilers) written 
+by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end group
+@end smallexample
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+@node Index, , Copying, Top
+@unnumbered Index
+@printindex cp
+
+@summarycontents
+@contents
+@bye
+
+Unresolved Issues:
+------------------
+1. From ADR.
+
+   Robert J. Chassell points out that awk programs should have some indication
+   of how to use them.  It would be useful to perhaps have a "programming
+   style" section of the manual that would include this and other tips.
+
+2. The default AWKPATH search path should be configurable via `configure'
+   The default and how this changes needs to be documented.
+
+Consistency issues:
+	/.../ regexps are in @code, not @samp
+	".." strings are in @code, not @samp
+	no @print before @dots
+	values of expressions in the text (@code{x} has the value 15),
+		should be in roman, not @code
+	Use   tab   and not   TAB
+	Use   ESC   and not   ESCAPE
+	Use   space and not   blank	to describe the space bar's character
+	The term "blank" is thus basically reserved for "blank lines" etc.
+	The `(d.c.)' should appear inside the closing `.' of a sentence
+		It should come before (pxref{...})
+	" " should have an @w{} around it
+	Use "non-" everywhere
+	Use @code{ftp} when talking about anonymous ftp
+	Use upper-case and lower-case, not "upper case" and "lower case"
+	Use alphanumeric, not alpha-numeric
+	Use --foo, not -Wfoo when describing long options
+	Use findex for all programs and functions in the example chapters
+	Use "Bell Laboratories", but not "Bell Labs".
+	Use "behavior" instead of "behaviour".
+	Use "zeros" instead of "zeroes".
+	Use "Input/Output", not "input/output". Also "I/O", not "i/o".
+	Use @code{do}, and not @code{do}-@code{while}, except where
+		actually discussing the do-while.
+	The words "a", "and", "as", "between", "for", "from", "in", "of",
+		"on", "that", "the", "to", "with", and "without",
+		should not be capitalized in @chapter, @section etc.
+		"Into" and "How" should.
+	Search for @dfn; make sure important items are also indexed.
+	"e.g." should always be followed by a comma.
+	"i.e." should never be followed by a comma, and should be followed
+		by `@:'.
+	The numbers zero through ten should be spelled out, except when
+		talking about file descriptor numbers. > 10 and < 0, it's
+		ok to use numbers.
+	In tables, put command line options in @code, while in the text,
+		put them in @samp.
+	When using @strong, use "Note:" or "Caution:" with colons and
+		not exclamation points.  Do not surround the paragraphs
+		with @quotation ... @end quotation.
+
+Date: Wed, 13 Apr 94 15:20:52 -0400
+From: rsm@gnu.ai.mit.edu (Richard Stallman)
+To: gnu-prog@gnu.ai.mit.edu
+Subject: A reminder: no pathnames in GNU
+
+It's a GNU convention to use the term "file name" for the name of a
+file, never "pathname".  We use the term "path" for search paths,
+which are lists of file names.  Using it for a single file name as
+well is potentially confusing to users.
+
+So please check any documentation you maintain, if you think you might
+have used "pathname".
+
+Note that "file name" should be two words when it appears as ordinary
+text.  It's ok as one word when it's a metasyntactic variable, though.
+
+Suggestions:
+------------
+Enhance FIELDWIDTHS with some way to indicate "the rest of the record".
+E.g., a length of 0 or -1 or something.  May be "n"?
+
+Make FIELDWIDTHS be an array?
+
+What if FIELDWIDTHS has invalid values in it?
diff --git a/contrib/awk/eval.c b/contrib/awk/eval.c
new file mode 100644
index 0000000..aa2e881
--- /dev/null
+++ b/contrib/awk/eval.c
@@ -0,0 +1,1720 @@
+/*
+ * eval.c - gawk parse tree interpreter 
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+
+#include <assert.h>
+
+extern double pow P((double x, double y));
+extern double modf P((double x, double *yp));
+extern double fmod P((double x, double y));
+
+static int eval_condition P((NODE *tree));
+static NODE *op_assign P((NODE *tree));
+static NODE *func_call P((NODE *name, NODE *arg_list));
+static NODE *match_op P((NODE *tree));
+static void push_args P((int count, NODE *arglist, NODE **oldstack, char *func_name));
+static void pop_fcall_stack P((void));
+static void pop_fcall P((void));
+static int in_function P((void));
+char *nodetype2str P((NODETYPE type));
+char *flags2str P((int flagval));
+
+#if __GNUC__ < 2
+NODE *_t;		/* used as a temporary in macros */
+#endif
+#ifdef MSDOS
+double _msc51bug;	/* to get around a bug in MSC 5.1 */
+#endif
+NODE *ret_node;
+int OFSlen;
+int ORSlen;
+int OFMTidx;
+int CONVFMTidx;
+
+/* Macros and variables to save and restore function and loop bindings */
+/*
+ * the val variable allows return/continue/break-out-of-context to be
+ * caught and diagnosed
+ */
+#define PUSH_BINDING(stack, x, val) (memcpy((char *)(stack), (char *)(x), sizeof(jmp_buf)), val++)
+#define RESTORE_BINDING(stack, x, val) (memcpy((char *)(x), (char *)(stack), sizeof(jmp_buf)), val--)
+
+static jmp_buf loop_tag;		/* always the current binding */
+static int loop_tag_valid = FALSE;	/* nonzero when loop_tag valid */
+static int func_tag_valid = FALSE;
+static jmp_buf func_tag;
+extern int exiting, exit_val;
+
+/*
+ * This table is used by the regexp routines to do case independant
+ * matching. Basically, every ascii character maps to itself, except
+ * uppercase letters map to lower case ones. This table has 256
+ * entries, for ISO 8859-1. Note also that if the system this
+ * is compiled on doesn't use 7-bit ascii, casetable[] should not be
+ * defined to the linker, so gawk should not load.
+ *
+ * Do NOT make this array static, it is used in several spots, not
+ * just in this file.
+ */
+#if 'a' == 97	/* it's ascii */
+char casetable[] = {
+	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+	/* ' '     '!'     '"'     '#'     '$'     '%'     '&'     ''' */
+	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+	/* '('     ')'     '*'     '+'     ','     '-'     '.'     '/' */
+	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+	/* '0'     '1'     '2'     '3'     '4'     '5'     '6'     '7' */
+	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+	/* '8'     '9'     ':'     ';'     '<'     '='     '>'     '?' */
+	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+	/* '@'     'A'     'B'     'C'     'D'     'E'     'F'     'G' */
+	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+	/* 'H'     'I'     'J'     'K'     'L'     'M'     'N'     'O' */
+	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+	/* 'P'     'Q'     'R'     'S'     'T'     'U'     'V'     'W' */
+	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+	/* 'X'     'Y'     'Z'     '['     '\'     ']'     '^'     '_' */
+	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+	/* '`'     'a'     'b'     'c'     'd'     'e'     'f'     'g' */
+	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+	/* 'h'     'i'     'j'     'k'     'l'     'm'     'n'     'o' */
+	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+	/* 'p'     'q'     'r'     's'     't'     'u'     'v'     'w' */
+	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+	/* 'x'     'y'     'z'     '{'     '|'     '}'     '~' */
+	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+#ifndef USE_PURE_ASCII
+	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+#else
+	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+#endif
+};
+#else
+#include "You lose. You will need a translation table for your character set."
+#endif
+
+/*
+ * This table maps node types to strings for debugging.
+ * KEEP IN SYNC WITH awk.h!!!!
+ */
+static char *nodetypes[] = {
+	"Node_illegal",
+	"Node_times",
+	"Node_quotient",
+	"Node_mod",
+	"Node_plus",
+	"Node_minus",
+	"Node_cond_pair",
+	"Node_subscript",
+	"Node_concat",
+	"Node_exp",
+	"Node_preincrement",
+	"Node_predecrement",
+	"Node_postincrement",
+	"Node_postdecrement",
+	"Node_unary_minus",
+	"Node_field_spec",
+	"Node_assign",
+	"Node_assign_times",
+	"Node_assign_quotient",
+	"Node_assign_mod",
+	"Node_assign_plus",
+	"Node_assign_minus",
+	"Node_assign_exp",
+	"Node_and",
+	"Node_or",
+	"Node_equal",
+	"Node_notequal",
+	"Node_less",
+	"Node_greater",
+	"Node_leq",
+	"Node_geq",
+	"Node_match",
+	"Node_nomatch",
+	"Node_not",
+	"Node_rule_list",
+	"Node_rule_node",
+	"Node_statement_list",
+	"Node_if_branches",
+	"Node_expression_list",
+	"Node_param_list",
+	"Node_K_if",
+	"Node_K_while",	
+	"Node_K_for",
+	"Node_K_arrayfor",
+	"Node_K_break",
+	"Node_K_continue",
+	"Node_K_print",
+	"Node_K_printf",
+	"Node_K_next",
+	"Node_K_exit",
+	"Node_K_do",
+	"Node_K_return",
+	"Node_K_delete",
+	"Node_K_getline",
+	"Node_K_function",
+	"Node_K_nextfile",
+	"Node_redirect_output",
+	"Node_redirect_append",
+	"Node_redirect_pipe",
+	"Node_redirect_pipein",
+	"Node_redirect_input",
+	"Node_var",
+	"Node_var_array",
+	"Node_val",
+	"Node_builtin",
+	"Node_line_range",
+	"Node_in_array",
+	"Node_func",
+	"Node_func_call",
+	"Node_cond_exp",
+	"Node_regex",
+	"Node_hashnode",
+	"Node_ahash",
+	"Node_NF",
+	"Node_NR",
+	"Node_FNR",
+	"Node_FS",
+	"Node_RS",
+	"Node_FIELDWIDTHS",
+	"Node_IGNORECASE",
+	"Node_OFS",
+	"Node_ORS",
+	"Node_OFMT",
+	"Node_CONVFMT",
+	"Node_final",
+	NULL
+};
+
+char *
+nodetype2str(type)
+NODETYPE type;
+{
+	static char buf[40];
+
+	if (type >= Node_illegal && type <= Node_final)
+		return nodetypes[(int) type];
+
+	sprintf(buf, "unknown nodetype %d", (int) type);
+	return buf;
+}
+
+/* flags2str --- make a flags value readable */
+
+char *
+flags2str(flagval)
+int flagval;
+{
+	static char buffer[BUFSIZ];
+	char *sp;
+
+	sp = buffer;
+
+	if (flagval & MALLOC) {
+		strcpy(sp, "MALLOC");
+		sp += strlen(sp);
+	}
+	if (flagval & TEMP) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "TEMP");
+		sp += strlen(sp);
+	}
+	if (flagval & PERM) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "PERM");
+		sp += strlen(sp);
+	}
+	if (flagval & STRING) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "STRING");
+		sp += strlen(sp);
+	}
+	if (flagval & STR) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "STR");
+		sp += strlen(sp);
+	}
+	if (flagval & NUM) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "NUM");
+		sp += strlen(sp);
+	}
+	if (flagval & NUMBER) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "NUMBER");
+		sp += strlen(sp);
+	}
+	if (flagval & MAYBE_NUM) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "MAYBE_NUM");
+		sp += strlen(sp);
+	}
+	if (flagval & ARRAYMAXED) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "ARRAYMAXED");
+		sp += strlen(sp);
+	}
+	if (flagval & SCALAR) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "SCALAR");
+		sp += strlen(sp);
+	}
+	if (flagval & FUNC) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "FUNC");
+		sp += strlen(sp);
+	}
+	if (flagval & FIELD) {
+		if (sp != buffer)
+			*sp++ = '|';
+		strcpy(sp, "FIELD");
+		sp += strlen(sp);
+	}
+
+	return buffer;
+}
+
+/*
+ * interpret:
+ * Tree is a bunch of rules to run. Returns zero if it hit an exit()
+ * statement 
+ */
+int
+interpret(tree)
+register NODE *volatile tree;
+{
+	jmp_buf volatile loop_tag_stack; /* shallow binding stack for loop_tag */
+	static jmp_buf rule_tag; /* tag the rule currently being run, for NEXT
+				  * and EXIT statements.  It is static because
+				  * there are no nested rules */
+	register NODE *volatile t = NULL;	/* temporary */
+	NODE **volatile lhs;	/* lhs == Left Hand Side for assigns, etc */
+	NODE *volatile stable_tree;
+	int volatile traverse = TRUE;	/* True => loop thru tree (Node_rule_list) */
+
+	/* avoid false source indications */
+	source = NULL;
+	sourceline = 0;
+
+	if (tree == NULL)
+		return 1;
+	sourceline = tree->source_line;
+	source = tree->source_file;
+	switch (tree->type) {
+	case Node_rule_node:
+		traverse = FALSE;  /* False => one for-loop iteration only */
+		/* FALL THROUGH */
+	case Node_rule_list:
+		for (t = tree; t != NULL; t = t->rnode) {
+			if (traverse)
+				tree = t->lnode;
+			sourceline = tree->source_line;
+			source = tree->source_file;
+			switch (setjmp(rule_tag)) {
+			case 0:	/* normal non-jump */
+				/* test pattern, if any */
+				if (tree->lnode == NULL ||
+				    eval_condition(tree->lnode))
+					(void) interpret(tree->rnode);
+				break;
+			case TAG_CONTINUE:	/* NEXT statement */
+				return 1;
+			case TAG_BREAK:
+				return 0;
+			default:
+				cant_happen();
+			}
+			if (! traverse) 	/* case Node_rule_node */
+				break;		/* don't loop */
+		}
+		break;
+
+	case Node_statement_list:
+		for (t = tree; t != NULL; t = t->rnode)
+			(void) interpret(t->lnode);
+		break;
+
+	case Node_K_if:
+		if (eval_condition(tree->lnode))
+			(void) interpret(tree->rnode->lnode);
+		else
+			(void) interpret(tree->rnode->rnode);
+		break;
+
+	case Node_K_while:
+		PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+
+		stable_tree = tree;
+		while (eval_condition(stable_tree->lnode)) {
+			switch (setjmp(loop_tag)) {
+			case 0:	/* normal non-jump */
+				(void) interpret(stable_tree->rnode);
+				break;
+			case TAG_CONTINUE:	/* continue statement */
+				break;
+			case TAG_BREAK:	/* break statement */
+				RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+				return 1;
+			default:
+				cant_happen();
+			}
+		}
+		RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		break;
+
+	case Node_K_do:
+		PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		stable_tree = tree;
+		do {
+			switch (setjmp(loop_tag)) {
+			case 0:	/* normal non-jump */
+				(void) interpret(stable_tree->rnode);
+				break;
+			case TAG_CONTINUE:	/* continue statement */
+				break;
+			case TAG_BREAK:	/* break statement */
+				RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+				return 1;
+			default:
+				cant_happen();
+			}
+		} while (eval_condition(stable_tree->lnode));
+		RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		break;
+
+	case Node_K_for:
+		PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		(void) interpret(tree->forloop->init);
+		stable_tree = tree;
+		while (eval_condition(stable_tree->forloop->cond)) {
+			switch (setjmp(loop_tag)) {
+			case 0:	/* normal non-jump */
+				(void) interpret(stable_tree->lnode);
+				/* fall through */
+			case TAG_CONTINUE:	/* continue statement */
+				(void) interpret(stable_tree->forloop->incr);
+				break;
+			case TAG_BREAK:	/* break statement */
+				RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+				return 1;
+			default:
+				cant_happen();
+			}
+		}
+		RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		break;
+
+	case Node_K_arrayfor:
+		{
+		volatile struct search l;	/* For array_for */
+		Func_ptr after_assign = NULL;
+
+#define hakvar forloop->init
+#define arrvar forloop->incr
+		PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		lhs = get_lhs(tree->hakvar, &after_assign);
+		t = tree->arrvar;
+		if (t->type == Node_param_list)
+			t = stack_ptr[t->param_cnt];
+		stable_tree = tree;
+		if ((t->flags & SCALAR) != 0)
+			fatal("attempt to use scalar as array");
+		for (assoc_scan(t, (struct search *)&l);
+		     l.retval;
+		     assoc_next((struct search *)&l)) {
+			unref(*((NODE **) lhs));
+			*lhs = dupnode(l.retval);
+			if (after_assign)
+				(*after_assign)();
+			switch (setjmp(loop_tag)) {
+			case 0:
+				(void) interpret(stable_tree->lnode);
+			case TAG_CONTINUE:
+				break;
+
+			case TAG_BREAK:
+				RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+				return 1;
+			default:
+				cant_happen();
+			}
+		}
+		RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+		break;
+		}
+
+	case Node_K_break:
+		if (! loop_tag_valid) {
+			/*
+			 * Old AT&T nawk treats break outside of loops like
+			 * next. New ones catch it at parse time. Allow it if
+			 * do_traditional is on, and complain if lint.
+			 */
+			static int warned = FALSE;
+
+			if (do_lint && ! warned) {
+				warning("use of `break' outside a loop is not portable");
+				warned = TRUE;
+			}
+			if (! do_traditional || do_posix)
+				fatal("use of `break' outside a loop is not allowed");
+			if (in_function())
+				pop_fcall_stack();
+			longjmp(rule_tag, TAG_CONTINUE);
+		} else
+			longjmp(loop_tag, TAG_BREAK);
+		break;
+
+	case Node_K_continue:
+		if (! loop_tag_valid) {
+			/*
+			 * Old AT&T nawk treats continue outside of loops like
+			 * next. New ones catch it at parse time. Allow it if
+			 * do_traditional is on, and complain if lint.
+			 */
+			static int warned = FALSE;
+
+			if (do_lint && ! warned) {
+				warning("use of `continue' outside a loop is not portable");
+				warned = TRUE;
+			}
+			if (! do_traditional || do_posix)
+				fatal("use of `continue' outside a loop is not allowed");
+			if (in_function())
+				pop_fcall_stack();
+			longjmp(rule_tag, TAG_CONTINUE);
+		} else
+			longjmp(loop_tag, TAG_CONTINUE);
+		break;
+
+	case Node_K_print:
+		do_print(tree);
+		break;
+
+	case Node_K_printf:
+		do_printf(tree);
+		break;
+
+	case Node_K_delete:
+		do_delete(tree->lnode, tree->rnode);
+		break;
+
+	case Node_K_next:
+		if (in_function())
+			pop_fcall_stack();
+		longjmp(rule_tag, TAG_CONTINUE);
+		break;
+
+	case Node_K_nextfile:
+		if (in_function())
+			pop_fcall_stack();
+		do_nextfile();
+		break;
+
+	case Node_K_exit:
+		/*
+		 * In A,K,&W, p. 49, it says that an exit statement "...
+		 * causes the program to behave as if the end of input had
+		 * occurred; no more input is read, and the END actions, if
+		 * any are executed." This implies that the rest of the rules
+		 * are not done. So we immediately break out of the main loop.
+		 */
+		exiting = TRUE;
+		if (tree->lnode != NULL) {
+			t = tree_eval(tree->lnode);
+			exit_val = (int) force_number(t);
+			free_temp(t);
+		}
+		longjmp(rule_tag, TAG_BREAK);
+		break;
+
+	case Node_K_return:
+		t = tree_eval(tree->lnode);
+		ret_node = dupnode(t);
+		free_temp(t);
+		longjmp(func_tag, TAG_RETURN);
+		break;
+
+	default:
+		/*
+		 * Appears to be an expression statement.  Throw away the
+		 * value. 
+		 */
+		if (do_lint && tree->type == Node_var)
+			warning("statement has no effect");
+		t = tree_eval(tree);
+		free_temp(t);
+		break;
+	}
+	return 1;
+}
+
+/* r_tree_eval --- evaluate a subtree */
+
+NODE *
+r_tree_eval(tree, iscond)
+register NODE *tree;
+int iscond;
+{
+	register NODE *r, *t1, *t2;	/* return value & temporary subtrees */
+	register NODE **lhs;
+	register int di;
+	AWKNUM x, x1, x2;
+	long lx;
+#ifdef _CRAY
+	long lx2;
+#endif
+	char namebuf[100];
+
+#ifdef DEBUG
+	if (tree == NULL)
+		return Nnull_string;
+	else if (tree->type == Node_val) {
+		if (tree->stref <= 0)
+			cant_happen();
+		return tree;
+	} else if (tree->type == Node_var) {
+		if (tree->var_value->stref <= 0)
+			cant_happen();
+		return tree->var_value;
+	}
+#endif
+
+	if (tree->type == Node_param_list) {
+		int paramnum = tree->param_cnt + 1;
+
+		tree = stack_ptr[tree->param_cnt];
+		if (tree == NULL)
+			return Nnull_string;
+		sprintf(namebuf, "parameter #%d", paramnum);
+		tree->vname = namebuf;
+	}
+
+	switch (tree->type) {
+	case Node_var:
+		return tree->var_value;
+
+	case Node_and:
+		return tmp_number((AWKNUM) (eval_condition(tree->lnode)
+					    && eval_condition(tree->rnode)));
+
+	case Node_or:
+		return tmp_number((AWKNUM) (eval_condition(tree->lnode)
+					    || eval_condition(tree->rnode)));
+
+	case Node_not:
+		return tmp_number((AWKNUM) ! eval_condition(tree->lnode));
+
+		/* Builtins */
+	case Node_builtin:
+		return (*tree->proc)(tree->subnode);
+
+	case Node_K_getline:
+		return (do_getline(tree));
+
+	case Node_in_array:
+		return tmp_number((AWKNUM) in_array(tree->lnode, tree->rnode));
+
+	case Node_func_call:
+		return func_call(tree->rnode, tree->lnode);
+
+		/* unary operations */
+	case Node_NR:
+	case Node_FNR:
+	case Node_NF:
+	case Node_FIELDWIDTHS:
+	case Node_FS:
+	case Node_RS:
+	case Node_field_spec:
+	case Node_subscript:
+	case Node_IGNORECASE:
+	case Node_OFS:
+	case Node_ORS:
+	case Node_OFMT:
+	case Node_CONVFMT:
+		lhs = get_lhs(tree, (Func_ptr *) NULL);
+		return *lhs;
+
+	case Node_var_array:
+		fatal("attempt to use array `%s' in a scalar context",
+			tree->vname);
+
+	case Node_unary_minus:
+		t1 = tree_eval(tree->subnode);
+		x = -force_number(t1);
+		free_temp(t1);
+		return tmp_number(x);
+
+	case Node_cond_exp:
+		if (eval_condition(tree->lnode))
+			return tree_eval(tree->rnode->lnode);
+		return tree_eval(tree->rnode->rnode);
+
+	case Node_match:
+	case Node_nomatch:
+	case Node_regex:
+		return match_op(tree);
+
+	case Node_func:
+		fatal("function `%s' called with space between name and (,\n%s",
+			tree->lnode->param,
+			"or used in other expression context");
+
+		/* assignments */
+	case Node_assign:
+		{
+		Func_ptr after_assign = NULL;
+
+		if (iscond && do_lint)
+			warning("assignment used in conditional context");
+		r = tree_eval(tree->rnode);
+		lhs = get_lhs(tree->lnode, &after_assign);
+		if (r != *lhs) {
+			NODE *save;
+
+			save = *lhs;
+			*lhs = dupnode(r);
+			unref(save);
+		}
+		free_temp(r);
+		tree->lnode->flags |= SCALAR;
+		if (after_assign)
+			(*after_assign)();
+		return *lhs;
+		}
+
+	case Node_concat:
+		{
+		NODE **treelist;
+		NODE **strlist;
+		NODE *save_tree;
+		register NODE **treep;
+		register NODE **strp;
+		register size_t len;
+		char *str;
+		register char *dest;
+		int count;
+
+		/*
+		 * This is an efficiency hack for multiple adjacent string
+		 * concatenations, to avoid recursion and string copies.
+		 *
+		 * Node_concat trees grow downward to the left, so
+		 * descend to lowest (first) node, accumulating nodes
+		 * to evaluate to strings as we go.
+		 */
+
+		/*
+		 * But first, no arbitrary limits. Count the number of
+		 * nodes and malloc the treelist and strlist arrays.
+		 * There will be count + 1 items to concatenate. We
+		 * also leave room for an extra pointer at the end to
+		 * use as a sentinel.  Thus, start count at 2.
+		 */
+		save_tree = tree;
+		for (count = 2; tree && tree->type == Node_concat; tree = tree->lnode)
+			count++;
+		tree = save_tree;
+		emalloc(treelist, NODE **, sizeof(NODE *) * count, "tree_eval");
+		emalloc(strlist, NODE **, sizeof(NODE *) * count, "tree_eval");
+
+		/* Now, here we go. */
+		treep = treelist;
+		while (tree && tree->type == Node_concat) {
+			*treep++ = tree->rnode;
+			tree = tree->lnode;
+		}
+		*treep = tree;
+		/*
+		 * Now, evaluate to strings in LIFO order, accumulating
+		 * the string length, so we can do a single malloc at the
+		 * end.
+		 */
+		strp = strlist;
+		len = 0;
+		while (treep >= treelist) {
+			*strp = force_string(tree_eval(*treep--));
+			len += (*strp)->stlen;
+			strp++;
+		}
+		*strp = NULL;
+		emalloc(str, char *, len+2, "tree_eval");
+		str[len] = str[len+1] = '\0';	/* for good measure */
+		dest = str;
+		strp = strlist;
+		while (*strp) {
+			memcpy(dest, (*strp)->stptr, (*strp)->stlen);
+			dest += (*strp)->stlen;
+			free_temp(*strp);
+			strp++;
+		}
+		r = make_str_node(str, len, ALREADY_MALLOCED);
+		r->flags |= TEMP;
+
+		free(strlist);
+		free(treelist);
+		}
+		return r;
+
+	/* other assignment types are easier because they are numeric */
+	case Node_preincrement:
+	case Node_predecrement:
+	case Node_postincrement:
+	case Node_postdecrement:
+	case Node_assign_exp:
+	case Node_assign_times:
+	case Node_assign_quotient:
+	case Node_assign_mod:
+	case Node_assign_plus:
+	case Node_assign_minus:
+		return op_assign(tree);
+	default:
+		break;	/* handled below */
+	}
+
+	/* evaluate subtrees in order to do binary operation, then keep going */
+	t1 = tree_eval(tree->lnode);
+	t2 = tree_eval(tree->rnode);
+
+	switch (tree->type) {
+	case Node_geq:
+	case Node_leq:
+	case Node_greater:
+	case Node_less:
+	case Node_notequal:
+	case Node_equal:
+		di = cmp_nodes(t1, t2);
+		free_temp(t1);
+		free_temp(t2);
+		switch (tree->type) {
+		case Node_equal:
+			return tmp_number((AWKNUM) (di == 0));
+		case Node_notequal:
+			return tmp_number((AWKNUM) (di != 0));
+		case Node_less:
+			return tmp_number((AWKNUM) (di < 0));
+		case Node_greater:
+			return tmp_number((AWKNUM) (di > 0));
+		case Node_leq:
+			return tmp_number((AWKNUM) (di <= 0));
+		case Node_geq:
+			return tmp_number((AWKNUM) (di >= 0));
+		default:
+			cant_happen();
+		}
+		break;
+	default:
+		break;	/* handled below */
+	}
+
+	x1 = force_number(t1);
+	free_temp(t1);
+	x2 = force_number(t2);
+	free_temp(t2);
+	switch (tree->type) {
+	case Node_exp:
+		if ((lx = x2) == x2 && lx >= 0) {	/* integer exponent */
+			if (lx == 0)
+				x = 1;
+			else if (lx == 1)
+				x = x1;
+			else {
+				/* doing it this way should be more precise */
+				for (x = x1; --lx; )
+					x *= x1;
+			}
+		} else
+			x = pow((double) x1, (double) x2);
+		return tmp_number(x);
+
+	case Node_times:
+		return tmp_number(x1 * x2);
+
+	case Node_quotient:
+		if (x2 == 0)
+			fatal("division by zero attempted");
+#ifdef _CRAY
+		/* special case for integer division, put in for Cray */
+		lx2 = x2;
+		if (lx2 == 0)
+			return tmp_number(x1 / x2);
+		lx = (long) x1 / lx2;
+		if (lx * x2 == x1)
+			return tmp_number((AWKNUM) lx);
+		else
+#endif
+			return tmp_number(x1 / x2);
+
+	case Node_mod:
+		if (x2 == 0)
+			fatal("division by zero attempted in mod");
+#ifdef HAVE_FMOD
+		return tmp_number(fmod(x1, x2));
+#else	/* ! HAVE_FMOD */
+		(void) modf(x1 / x2, &x);
+		return tmp_number(x1 - x * x2);
+#endif	/* ! HAVE_FMOD */
+
+	case Node_plus:
+		return tmp_number(x1 + x2);
+
+	case Node_minus:
+		return tmp_number(x1 - x2);
+
+	case Node_var_array:
+		fatal("attempt to use array `%s' in a scalar context",
+			tree->vname);
+
+	default:
+		fatal("illegal type (%s) in tree_eval", nodetype2str(tree->type));
+	}
+	return 0;
+}
+
+/* eval_condition --- is TREE true or false? Returns 0==false, non-zero==true */
+
+static int
+eval_condition(tree)
+register NODE *tree;
+{
+	register NODE *t1;
+	register int ret;
+
+	if (tree == NULL)	/* Null trees are the easiest kinds */
+		return TRUE;
+	if (tree->type == Node_line_range) {
+		/*
+		 * Node_line_range is kind of like Node_match, EXCEPT: the
+		 * lnode field (more properly, the condpair field) is a node
+		 * of a Node_cond_pair; whether we evaluate the lnode of that
+		 * node or the rnode depends on the triggered word.  More
+		 * precisely:  if we are not yet triggered, we tree_eval the
+		 * lnode; if that returns true, we set the triggered word. 
+		 * If we are triggered (not ELSE IF, note), we tree_eval the
+		 * rnode, clear triggered if it succeeds, and perform our
+		 * action (regardless of success or failure).  We want to be
+		 * able to begin and end on a single input record, so this
+		 * isn't an ELSE IF, as noted above.
+		 */
+		if (! tree->triggered)
+			if (! eval_condition(tree->condpair->lnode))
+				return FALSE;
+			else
+				tree->triggered = TRUE;
+		/* Else we are triggered */
+		if (eval_condition(tree->condpair->rnode))
+			tree->triggered = FALSE;
+		return TRUE;
+	}
+
+	/*
+	 * Could just be J.random expression. in which case, null and 0 are
+	 * false, anything else is true 
+	 */
+
+	t1 = m_tree_eval(tree, TRUE);
+	if (t1->flags & MAYBE_NUM)
+		(void) force_number(t1);
+	if (t1->flags & NUMBER)
+		ret = (t1->numbr != 0.0);
+	else
+		ret = (t1->stlen != 0);
+	free_temp(t1);
+	return ret;
+}
+
+/* cmp_nodes --- compare two nodes, returning negative, 0, positive */
+
+int
+cmp_nodes(t1, t2)
+register NODE *t1, *t2;
+{
+	register int ret;
+	register size_t len1, len2;
+	register int l;
+	int ldiff;
+
+	if (t1 == t2)
+		return 0;
+	if (t1->flags & MAYBE_NUM)
+		(void) force_number(t1);
+	if (t2->flags & MAYBE_NUM)
+		(void) force_number(t2);
+	if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) {
+		if (t1->numbr == t2->numbr)
+			return 0;
+		/* don't subtract, in case one or both are infinite */
+		else if (t1->numbr < t2->numbr)
+			return -1;
+		else
+			return 1;
+	}
+	(void) force_string(t1);
+	(void) force_string(t2);
+	len1 = t1->stlen;
+	len2 = t2->stlen;
+	ldiff = len1 - len2;
+	if (len1 == 0 || len2 == 0)
+		return ldiff;
+	l = (ldiff <= 0 ? len1 : len2);
+	if (IGNORECASE) {
+		register unsigned char *cp1 = (unsigned char *) t1->stptr;
+		register unsigned char *cp2 = (unsigned char *) t2->stptr;
+
+		for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
+			ret = casetable[*cp1] - casetable[*cp2];
+	} else
+		ret = memcmp(t1->stptr, t2->stptr, l);
+	return (ret == 0 ? ldiff : ret);
+}
+
+/* op_assign --- do +=, -=, etc. */
+
+static NODE *
+op_assign(tree)
+register NODE *tree;
+{
+	AWKNUM rval, lval;
+	NODE **lhs;
+	AWKNUM t1, t2;
+	long ltemp;
+	NODE *tmp;
+	Func_ptr after_assign = NULL;
+
+	lhs = get_lhs(tree->lnode, &after_assign);
+	lval = force_number(*lhs);
+
+	/*
+	 * Can't unref *lhs until we know the type; doing so
+	 * too early breaks   x += x   sorts of things.
+	 */
+	switch(tree->type) {
+	case Node_preincrement:
+	case Node_predecrement:
+		unref(*lhs);
+		*lhs = make_number(lval +
+			       (tree->type == Node_preincrement ? 1.0 : -1.0));
+		tree->lnode->flags |= SCALAR;
+		if (after_assign)
+			(*after_assign)();
+		return *lhs;
+
+	case Node_postincrement:
+	case Node_postdecrement:
+		unref(*lhs);
+		*lhs = make_number(lval +
+			       (tree->type == Node_postincrement ? 1.0 : -1.0));
+		tree->lnode->flags |= SCALAR;
+		if (after_assign)
+			(*after_assign)();
+		return tmp_number(lval);
+	default:
+		break;	/* handled below */
+	}
+
+	tmp = tree_eval(tree->rnode);
+	rval = force_number(tmp);
+	free_temp(tmp);
+
+	/*
+	 * Do this again; the lhs and the rhs could both be fields.
+	 * Accessing the rhs could cause the lhs to have moved around.
+	 * (Yet another special case. Gack.)
+	 */
+	lhs = get_lhs(tree->lnode, &after_assign);
+
+	unref(*lhs);
+	switch(tree->type) {
+	case Node_assign_exp:
+		if ((ltemp = rval) == rval) {	/* integer exponent */
+			if (ltemp == 0)
+				*lhs = make_number((AWKNUM) 1);
+			else if (ltemp == 1)
+				*lhs = make_number(lval);
+			else {
+				/* doing it this way should be more precise */
+				for (t1 = t2 = lval; --ltemp; )
+					t1 *= t2;
+				*lhs = make_number(t1);
+			}
+		} else
+			*lhs = make_number((AWKNUM) pow((double) lval, (double) rval));
+		break;
+
+	case Node_assign_times:
+		*lhs = make_number(lval * rval);
+		break;
+
+	case Node_assign_quotient:
+		if (rval == (AWKNUM) 0)
+			fatal("division by zero attempted in /=");
+#ifdef _CRAY
+		/* special case for integer division, put in for Cray */
+		ltemp = rval;
+		if (ltemp == 0) {
+			*lhs = make_number(lval / rval);
+			break;
+		}
+		ltemp = (long) lval / ltemp;
+		if (ltemp * lval == rval)
+			*lhs = make_number((AWKNUM) ltemp);
+		else
+#endif	/* _CRAY */
+			*lhs = make_number(lval / rval);
+		break;
+
+	case Node_assign_mod:
+		if (rval == (AWKNUM) 0)
+			fatal("division by zero attempted in %%=");
+#ifdef HAVE_FMOD
+		*lhs = make_number(fmod(lval, rval));
+#else	/* ! HAVE_FMOD */
+		(void) modf(lval / rval, &t1);
+		t2 = lval - rval * t1;
+		*lhs = make_number(t2);
+#endif	/* ! HAVE_FMOD */
+		break;
+
+	case Node_assign_plus:
+		*lhs = make_number(lval + rval);
+		break;
+
+	case Node_assign_minus:
+		*lhs = make_number(lval - rval);
+		break;
+	default:
+		cant_happen();
+	}
+	tree->lnode->flags |= SCALAR;
+	if (after_assign)
+		(*after_assign)();
+	return *lhs;
+}
+
+static struct fcall {
+	char *fname;
+	unsigned long count;
+	NODE *arglist;
+	NODE **prevstack;
+	NODE **stack;
+} *fcall_list = NULL;
+
+static long fcall_list_size = 0;
+static long curfcall = -1;
+
+/* in_function --- return true/false if we need to unwind awk functions */
+
+static int
+in_function()
+{
+	return (curfcall >= 0);
+}
+
+/* pop_fcall --- pop off a single function call */
+
+static void
+pop_fcall()
+{
+	NODE *n, **sp, *arg, *argp;
+	int count;
+	struct fcall *f;
+
+	assert(curfcall >= 0);
+	f = & fcall_list[curfcall];
+	stack_ptr = f->prevstack;
+
+	/*
+	 * here, we pop each parameter and check whether
+	 * it was an array.  If so, and if the arg. passed in was
+	 * a simple variable, then the value should be copied back.
+	 * This achieves "call-by-reference" for arrays.
+	 */
+	sp = f->stack;
+	count = f->count;
+
+	for (argp = f->arglist; count > 0 && argp != NULL; argp = argp->rnode) {
+		arg = argp->lnode;
+		if (arg->type == Node_param_list)
+			arg = stack_ptr[arg->param_cnt];
+		n = *sp++;
+		if ((arg->type == Node_var || arg->type == Node_var_array)
+		    && n->type == Node_var_array) {
+			/* should we free arg->var_value ? */
+			arg->var_array = n->var_array;
+			arg->type = Node_var_array;
+			arg->array_size = n->array_size;
+			arg->table_size = n->table_size;
+			arg->flags = n->flags;
+		}
+		/* n->lnode overlays the array size, don't unref it if array */
+		if (n->type != Node_var_array)
+			unref(n->lnode);
+		freenode(n);
+		count--;
+	}
+	while (count-- > 0) {
+		n = *sp++;
+		/* if n is a local array, all the elements should be freed */
+		if (n->type == Node_var_array)
+			assoc_clear(n);
+		unref(n->lnode);
+		freenode(n);
+	}
+	if (f->stack)
+		free((char *) f->stack);
+	memset(f, '\0', sizeof(struct fcall));
+	curfcall--;
+}
+
+/* pop_fcall_stack --- pop off all function args, don't leak memory */
+
+static void
+pop_fcall_stack()
+{
+	while (curfcall >= 0)
+		pop_fcall();
+}
+
+/* push_args --- push function arguments onto the stack */
+
+static void
+push_args(count, arglist, oldstack, func_name)
+int count;
+NODE *arglist;
+NODE **oldstack;
+char *func_name;
+{
+	struct fcall *f;
+	NODE *arg, *argp, *r, **sp, *n;
+
+	if (fcall_list_size == 0) {	/* first time */
+		emalloc(fcall_list, struct fcall *, 10 * sizeof(struct fcall),
+			"push_args");
+		fcall_list_size = 10;
+	}
+
+	if (++curfcall >= fcall_list_size) {
+		fcall_list_size *= 2;
+		erealloc(fcall_list, struct fcall *,
+			fcall_list_size * sizeof(struct fcall), "push_args");
+	}
+	f = & fcall_list[curfcall];
+	memset(f, '\0', sizeof(struct fcall));
+
+	if (count > 0)
+		emalloc(f->stack, NODE **, count*sizeof(NODE *), "func_call");
+	f->count = count;
+	f->fname = func_name;	/* not used, for debugging, just in case */
+	f->arglist = arglist;
+	f->prevstack = oldstack;
+
+	sp = f->stack;
+
+	/* for each calling arg. add NODE * on stack */
+	for (argp = arglist; count > 0 && argp != NULL; argp = argp->rnode) {
+		arg = argp->lnode;
+		getnode(r);
+		r->type = Node_var;
+
+		/* call by reference for arrays; see below also */
+		if (arg->type == Node_param_list)
+			arg = f->prevstack[arg->param_cnt];
+		if (arg->type == Node_var_array)
+			*r = *arg;
+		else {
+			n = tree_eval(arg);
+			r->lnode = dupnode(n);
+			r->rnode = (NODE *) NULL;
+  			if ((n->flags & SCALAR) != 0)
+	  			r->flags |= SCALAR;
+			free_temp(n);
+  		}
+		*sp++ = r;
+		count--;
+	}
+	if (argp != NULL)	/* left over calling args. */
+		warning(
+		    "function `%s' called with more arguments than declared",
+		    func_name);
+
+	/* add remaining params. on stack with null value */
+	while (count-- > 0) {
+		getnode(r);
+		r->type = Node_var;
+		r->lnode = Nnull_string;
+		r->flags &= ~SCALAR;
+		r->rnode = (NODE *) NULL;
+		*sp++ = r;
+	}
+
+	/*
+	 * We have to reassign f. Why, you may ask?  It is possible that
+	 * other functions were called during the course of tree_eval()-ing
+	 * the arguments to this function. As a result of that, fcall_list
+	 * may have been realloc()'ed, with the result that f is now
+	 * pointing into free()'d space.  This was a nasty one to track down.
+	 */
+	f = & fcall_list[curfcall];
+
+	stack_ptr = f->stack;
+}
+
+/* func_call --- call a function, call by reference for arrays */
+
+NODE **stack_ptr;
+
+static NODE *
+func_call(name, arg_list)
+NODE *name;		/* name is a Node_val giving function name */
+NODE *arg_list;		/* Node_expression_list of calling args. */
+{
+	register NODE *r;
+	NODE *f;
+	jmp_buf volatile func_tag_stack;
+	jmp_buf volatile loop_tag_stack;
+	int volatile save_loop_tag_valid = FALSE;
+	NODE *save_ret_node;
+	extern NODE *ret_node;
+
+	/* retrieve function definition node */
+	f = lookup(name->stptr);
+	if (f == NULL || f->type != Node_func)
+		fatal("function `%s' not defined", name->stptr);
+#ifdef FUNC_TRACE
+	fprintf(stderr, "function %s called\n", name->stptr);
+#endif
+	push_args(f->lnode->param_cnt, arg_list, stack_ptr, name->stptr);
+
+	/*
+	 * Execute function body, saving context, as a return statement
+	 * will longjmp back here.
+	 *
+	 * Have to save and restore the loop_tag stuff so that a return
+	 * inside a loop in a function body doesn't scrog any loops going
+	 * on in the main program.  We save the necessary info in variables
+	 * local to this function so that function nesting works OK.
+	 * We also only bother to save the loop stuff if we're in a loop
+	 * when the function is called.
+	 */
+	if (loop_tag_valid) {
+		int junk = 0;
+
+		save_loop_tag_valid = (volatile int) loop_tag_valid;
+		PUSH_BINDING(loop_tag_stack, loop_tag, junk);
+		loop_tag_valid = FALSE;
+	}
+	PUSH_BINDING(func_tag_stack, func_tag, func_tag_valid);
+	save_ret_node = ret_node;
+	ret_node = Nnull_string;	/* default return value */
+	if (setjmp(func_tag) == 0)
+		(void) interpret(f->rnode);
+
+	r = ret_node;
+	ret_node = (NODE *) save_ret_node;
+	RESTORE_BINDING(func_tag_stack, func_tag, func_tag_valid);
+	pop_fcall();
+
+	/* Restore the loop_tag stuff if necessary. */
+	if (save_loop_tag_valid) {
+		int junk = 0;
+
+		loop_tag_valid = (int) save_loop_tag_valid;
+		RESTORE_BINDING(loop_tag_stack, loop_tag, junk);
+	}
+
+	if ((r->flags & PERM) == 0)
+		r->flags |= TEMP;
+	return r;
+}
+
+/*
+ * r_get_lhs:
+ * This returns a POINTER to a node pointer. get_lhs(ptr) is the current
+ * value of the var, or where to store the var's new value 
+ */
+
+NODE **
+r_get_lhs(ptr, assign)
+register NODE *ptr;
+Func_ptr *assign;
+{
+	register NODE **aptr = NULL;
+	register NODE *n;
+
+	if (assign)
+		*assign = NULL;	/* for safety */
+	if (ptr->type == Node_param_list)
+		ptr = stack_ptr[ptr->param_cnt];
+
+	switch (ptr->type) {
+	case Node_var_array:
+		fatal("attempt to use array `%s' in a scalar context",
+			ptr->vname);
+
+	case Node_var:
+		aptr = &(ptr->var_value);
+#ifdef DEBUG
+		if (ptr->var_value->stref <= 0)
+			cant_happen();
+#endif
+		break;
+
+	case Node_FIELDWIDTHS:
+		aptr = &(FIELDWIDTHS_node->var_value);
+		if (assign != NULL)
+			*assign = set_FIELDWIDTHS;
+		break;
+
+	case Node_RS:
+		aptr = &(RS_node->var_value);
+		if (assign != NULL)
+			*assign = set_RS;
+		break;
+
+	case Node_FS:
+		aptr = &(FS_node->var_value);
+		if (assign != NULL)
+			*assign = set_FS;
+		break;
+
+	case Node_FNR:
+		unref(FNR_node->var_value);
+		FNR_node->var_value = make_number((AWKNUM) FNR);
+		aptr = &(FNR_node->var_value);
+		if (assign != NULL)
+			*assign = set_FNR;
+		break;
+
+	case Node_NR:
+		unref(NR_node->var_value);
+		NR_node->var_value = make_number((AWKNUM) NR);
+		aptr = &(NR_node->var_value);
+		if (assign != NULL)
+			*assign = set_NR;
+		break;
+
+	case Node_NF:
+		if (NF == -1)
+			(void) get_field(HUGE-1, assign); /* parse record */
+		unref(NF_node->var_value);
+		NF_node->var_value = make_number((AWKNUM) NF);
+		aptr = &(NF_node->var_value);
+		if (assign != NULL)
+			*assign = set_NF;
+		break;
+
+	case Node_IGNORECASE:
+		aptr = &(IGNORECASE_node->var_value);
+		if (assign != NULL)
+			*assign = set_IGNORECASE;
+		break;
+
+	case Node_OFMT:
+		aptr = &(OFMT_node->var_value);
+		if (assign != NULL)
+			*assign = set_OFMT;
+		break;
+
+	case Node_CONVFMT:
+		aptr = &(CONVFMT_node->var_value);
+		if (assign != NULL)
+			*assign = set_CONVFMT;
+		break;
+
+	case Node_ORS:
+		aptr = &(ORS_node->var_value);
+		if (assign != NULL)
+			*assign = set_ORS;
+		break;
+
+	case Node_OFS:
+		aptr = &(OFS_node->var_value);
+		if (assign != NULL)
+			*assign = set_OFS;
+		break;
+
+	case Node_param_list:
+		aptr = &(stack_ptr[ptr->param_cnt]->var_value);
+		break;
+
+	case Node_field_spec:
+		{
+		int field_num;
+
+		n = tree_eval(ptr->lnode);
+		field_num = (int) force_number(n);
+		free_temp(n);
+		if (field_num < 0)
+			fatal("attempt to access field %d", field_num);
+		if (field_num == 0 && field0_valid) {	/* short circuit */
+			aptr = &fields_arr[0];
+			if (assign != NULL)
+				*assign = reset_record;
+			break;
+		}
+		aptr = get_field(field_num, assign);
+		break;
+		}
+	case Node_subscript:
+		n = ptr->lnode;
+		if (n->type == Node_param_list) {
+			int i = n->param_cnt + 1;
+
+			n = stack_ptr[n->param_cnt];
+			if ((n->flags & SCALAR) != 0)
+				fatal("attempt to use scalar parameter %d as an array", i);
+		} else if (n->type == Node_func) {
+			fatal("attempt to use function `%s' as array",
+				n->lnode->param);
+		}
+		aptr = assoc_lookup(n, concat_exp(ptr->rnode));
+		break;
+
+	case Node_func:
+		fatal("`%s' is a function, assignment is not allowed",
+			ptr->lnode->param);
+
+	case Node_builtin:
+		fatal("assignment is not allowed to result of builtin function");
+	default:
+		cant_happen();
+	}
+	return aptr;
+}
+
+/* match_op --- do ~ and !~ */
+
+static NODE *
+match_op(tree)
+register NODE *tree;
+{
+	register NODE *t1;
+	register Regexp *rp;
+	int i;
+	int match = TRUE;
+	int kludge_need_start = FALSE;	/* FIXME: --- see below */
+
+	if (tree->type == Node_nomatch)
+		match = FALSE;
+	if (tree->type == Node_regex)
+		t1 = *get_field(0, (Func_ptr *) 0);
+	else {
+		t1 = force_string(tree_eval(tree->lnode));
+		tree = tree->rnode;
+	}
+	rp = re_update(tree);
+	/*
+	 * FIXME:
+	 *
+	 * Any place where research() is called with a last parameter of
+	 * FALSE, we need to use the avoid_dfa test. This is the only place
+	 * at the moment.
+	 *
+	 * A new or improved dfa that distinguishes beginning/end of
+	 * string from beginning/end of line will allow us to get rid of
+	 * this temporary hack.
+	 *
+	 * The avoid_dfa() function is in re.c; it is not very smart.
+	 */
+	if (avoid_dfa(tree, t1->stptr, t1->stlen))
+		kludge_need_start = TRUE;
+	i = research(rp, t1->stptr, 0, t1->stlen, kludge_need_start);
+	i = (i == -1) ^ (match == TRUE);
+	free_temp(t1);
+	return tmp_number((AWKNUM) i);
+}
+
+/* set_IGNORECASE --- update IGNORECASE as appropriate */
+
+void
+set_IGNORECASE()
+{
+	static int warned = FALSE;
+
+	if ((do_lint || do_traditional) && ! warned) {
+		warned = TRUE;
+		warning("IGNORECASE not supported in compatibility mode");
+	}
+	if (do_traditional)
+		IGNORECASE = FALSE;
+	else if ((IGNORECASE_node->var_value->flags & (STRING|STR)) != 0) {
+		if ((IGNORECASE_node->var_value->flags & MAYBE_NUM) == 0)
+			IGNORECASE = (force_string(IGNORECASE_node->var_value)->stlen > 0);
+		else
+			IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+	} else if ((IGNORECASE_node->var_value->flags & (NUM|NUMBER)) != 0)
+		IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+	else
+		IGNORECASE = FALSE;		/* shouldn't happen */
+	set_FS_if_not_FIELDWIDTHS();
+}
+
+/* set_OFS --- update OFS related variables when OFS assigned to */
+
+void
+set_OFS()
+{
+	OFS = force_string(OFS_node->var_value)->stptr;
+	OFSlen = OFS_node->var_value->stlen;
+	OFS[OFSlen] = '\0';
+}
+
+/* set_ORS --- update ORS related variables when ORS assigned to */
+
+void
+set_ORS()
+{
+	ORS = force_string(ORS_node->var_value)->stptr;
+	ORSlen = ORS_node->var_value->stlen;
+	ORS[ORSlen] = '\0';
+}
+
+/* fmt_ok --- is the conversion format a valid one? */
+
+NODE **fmt_list = NULL;
+static int fmt_ok P((NODE *n));
+static int fmt_index P((NODE *n));
+
+static int
+fmt_ok(n)
+NODE *n;
+{
+	NODE *tmp = force_string(n);
+	char *p = tmp->stptr;
+
+	if (*p++ != '%')
+		return 0;
+	while (*p && strchr(" +-#", *p) != NULL)	/* flags */
+		p++;
+	while (*p && isdigit(*p))	/* width - %*.*g is NOT allowed */
+		p++;
+	if (*p == '\0' || (*p != '.' && ! isdigit(*p)))
+		return 0;
+	if (*p == '.')
+		p++;
+	while (*p && isdigit(*p))	/* precision */
+		p++;
+	if (*p == '\0' || strchr("efgEG", *p) == NULL)
+		return 0;
+	if (*++p != '\0')
+		return 0;
+	return 1;
+}
+
+/* fmt_index --- track values of OFMT and CONVFMT to keep semantics correct */
+
+static int
+fmt_index(n)
+NODE *n;
+{
+	register int ix = 0;
+	static int fmt_num = 4;
+	static int fmt_hiwater = 0;
+
+	if (fmt_list == NULL)
+		emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
+	(void) force_string(n);
+	while (ix < fmt_hiwater) {
+		if (cmp_nodes(fmt_list[ix], n) == 0)
+			return ix;
+		ix++;
+	}
+	/* not found */
+	n->stptr[n->stlen] = '\0';
+	if (do_lint && ! fmt_ok(n))
+		warning("bad %sFMT specification",
+			    n == CONVFMT_node->var_value ? "CONV"
+			  : n == OFMT_node->var_value ? "O"
+			  : "");
+
+	if (fmt_hiwater >= fmt_num) {
+		fmt_num *= 2;
+		emalloc(fmt_list, NODE **, fmt_num, "fmt_index");
+	}
+	fmt_list[fmt_hiwater] = dupnode(n);
+	return fmt_hiwater++;
+}
+
+/* set_OFMT --- track OFMT correctly */
+
+void
+set_OFMT()
+{
+	OFMTidx = fmt_index(OFMT_node->var_value);
+	OFMT = fmt_list[OFMTidx]->stptr;
+}
+
+/* set_CONVFMT --- track CONVFMT correctly */
+
+void
+set_CONVFMT()
+{
+	CONVFMTidx = fmt_index(CONVFMT_node->var_value);
+	CONVFMT = fmt_list[CONVFMTidx]->stptr;
+}
diff --git a/contrib/awk/field.c b/contrib/awk/field.c
new file mode 100644
index 0000000..31c9628
--- /dev/null
+++ b/contrib/awk/field.c
@@ -0,0 +1,915 @@
+/*
+ * field.c - routines for dealing with fields and record parsing
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+#include <assert.h>
+
+typedef void (* Setfunc) P((long, char *, long, NODE *));
+
+static long (*parse_field) P((long, char **, int, NODE *,
+			     Regexp *, Setfunc, NODE *));
+static void rebuild_record P((void));
+static long re_parse_field P((long, char **, int, NODE *,
+			     Regexp *, Setfunc, NODE *));
+static long def_parse_field P((long, char **, int, NODE *,
+			      Regexp *, Setfunc, NODE *));
+static long posix_def_parse_field P((long, char **, int, NODE *,
+			      Regexp *, Setfunc, NODE *));
+static long null_parse_field P((long, char **, int, NODE *,
+			     Regexp *, Setfunc, NODE *));
+static long sc_parse_field P((long, char **, int, NODE *,
+			     Regexp *, Setfunc, NODE *));
+static long fw_parse_field P((long, char **, int, NODE *,
+			     Regexp *, Setfunc, NODE *));
+static void set_element P((long num, char * str, long len, NODE *arr));
+static void grow_fields_arr P((long num));
+static void set_field P((long num, char *str, long len, NODE *dummy));
+
+
+static char *parse_extent;	/* marks where to restart parse of record */
+static long parse_high_water = 0; /* field number that we have parsed so far */
+static long nf_high_water = 0;	/* size of fields_arr */
+static int resave_fs;
+static NODE *save_FS;		/* save current value of FS when line is read,
+				 * to be used in deferred parsing
+				 */
+static int *FIELDWIDTHS = NULL;
+
+NODE **fields_arr;		/* array of pointers to the field nodes */
+int field0_valid;		/* $(>0) has not been changed yet */
+int default_FS;			/* TRUE when FS == " " */
+Regexp *FS_regexp = NULL;
+static NODE *Null_field = NULL;
+
+/* init_fields --- set up the fields array to start with */
+
+void
+init_fields()
+{
+	NODE *n;
+
+	emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
+	getnode(n);
+	*n = *Nnull_string;
+	n->flags |= (SCALAR|FIELD);
+	n->flags &= ~PERM;
+	fields_arr[0] = n;
+	parse_extent = fields_arr[0]->stptr;
+	save_FS = dupnode(FS_node->var_value);
+	getnode(Null_field);
+	*Null_field = *Nnull_string;
+	Null_field->flags |= (SCALAR|FIELD);
+	Null_field->flags &= ~(NUM|NUMBER|MAYBE_NUM|PERM);
+	field0_valid = TRUE;
+}
+
+/* grow_fields --- acquire new fields as needed */
+
+static void
+grow_fields_arr(num)
+long num;
+{
+	register int t;
+	register NODE *n;
+
+	erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
+	for (t = nf_high_water + 1; t <= num; t++) {
+		getnode(n);
+		*n = *Null_field;
+		fields_arr[t] = n;
+	}
+	nf_high_water = num;
+}
+
+/* set_field --- set the value of a particular field */
+
+/*ARGSUSED*/
+static void
+set_field(num, str, len, dummy)
+long num;
+char *str;
+long len;
+NODE *dummy;	/* not used -- just to make interface same as set_element */
+{
+	register NODE *n;
+
+	if (num > nf_high_water)
+		grow_fields_arr(num);
+	n = fields_arr[num];
+	n->stptr = str;
+	n->stlen = len;
+	n->flags = (STR|STRING|MAYBE_NUM|SCALAR|FIELD);
+}
+
+/* rebuild_record --- Someone assigned a value to $(something).
+			Fix up $0 to be right */
+
+static void
+rebuild_record()
+{
+	/*
+	 * use explicit unsigned longs for lengths, in case
+	 * a size_t isn't big enough.
+	 */
+	register unsigned long tlen;
+	register unsigned long ofslen;
+	register NODE *tmp;
+	NODE *ofs;
+	char *ops;
+	register char *cops;
+	long i;
+	char *f0start, *f0end;
+
+	assert(NF != -1);
+
+	tlen = 0;
+	ofs = force_string(OFS_node->var_value);
+	ofslen = ofs->stlen;
+	for (i = NF; i > 0; i--) {
+		tmp = fields_arr[i];
+		tmp = force_string(tmp);
+		tlen += tmp->stlen;
+	}
+	tlen += (NF - 1) * ofslen;
+	if ((long) tlen < 0)
+		tlen = 0;
+	emalloc(ops, char *, tlen + 2, "rebuild_record");
+	cops = ops;
+	ops[0] = '\0';
+	for (i = 1;  i <= NF; i++) {
+		tmp = fields_arr[i];
+		/* copy field */
+		if (tmp->stlen == 1)
+			*cops++ = tmp->stptr[0];
+		else if (tmp->stlen != 0) {
+			memcpy(cops, tmp->stptr, tmp->stlen);
+			cops += tmp->stlen;
+		}
+		/* copy OFS */
+		if (i != NF) {
+			if (ofslen == 1)
+				*cops++ = ofs->stptr[0];
+			else if (ofslen != 0) {
+				memcpy(cops, ofs->stptr, ofslen);
+				cops += ofslen;
+			}
+		}
+	}
+	tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
+
+	/*
+	 * Since we are about to unref fields_arr[0], we want to find
+	 * any fields that still point into it, and have them point
+	 * into the new field zero.
+	 */
+	f0start = fields_arr[0]->stptr;
+	f0end = fields_arr[0]->stptr + fields_arr[0]->stlen;
+	for (cops = ops, i = 1; i <= NF; i++) {
+		char *field_data = fields_arr[i]->stptr;
+
+		if (fields_arr[i]->stlen > 0
+		    && f0start <= field_data && field_data < f0end)
+			fields_arr[i]->stptr = cops;
+
+		cops += fields_arr[i]->stlen + ofslen;
+	}
+
+	unref(fields_arr[0]);
+
+	fields_arr[0] = tmp;
+	field0_valid = TRUE;
+}
+
+/*
+ * set_record:
+ * setup $0, but defer parsing rest of line until reference is made to $(>0)
+ * or to NF.  At that point, parse only as much as necessary.
+ */
+void
+set_record(buf, cnt, freeold)
+char *buf;		/* ignored if ! freeold */
+int cnt;		/* ignored if ! freeold */
+int freeold;
+{
+	register int i;
+	NODE *n;
+
+	NF = -1;
+	for (i = 1; i <= parse_high_water; i++) {
+		unref(fields_arr[i]);
+		getnode(n);
+		*n = *Null_field;
+		fields_arr[i] = n;
+	}
+
+	parse_high_water = 0;
+	/*
+	 * $0 = $0 should resplit using the current value of FS, thus,
+	 * this is executed orthogonally to the value of freeold.
+	 */
+	if (resave_fs) {
+		resave_fs = FALSE;
+		unref(save_FS);
+		save_FS = dupnode(FS_node->var_value);
+	}
+	if (freeold) {
+		unref(fields_arr[0]);
+		getnode(n);
+		n->stptr = buf;
+		n->stlen = cnt;
+		n->stref = 1;
+		n->type = Node_val;
+		n->stfmt = -1;
+		n->flags = (STRING|STR|MAYBE_NUM|SCALAR|FIELD);
+		fields_arr[0] = n;
+	}
+	fields_arr[0]->flags |= MAYBE_NUM;
+	field0_valid = TRUE;
+}
+
+/* reset_record --- start over again with current $0 */
+
+void
+reset_record()
+{
+	(void) force_string(fields_arr[0]);
+	set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, FALSE);
+}
+
+/* set_NF --- handle what happens to $0 and fields when NF is changed */
+
+void
+set_NF()
+{
+	register int i;
+	NODE *n;
+
+	assert(NF != -1);
+
+	NF = (long) force_number(NF_node->var_value);
+	if (NF > nf_high_water)
+		grow_fields_arr(NF);
+	if (parse_high_water < NF) {
+		for (i = parse_high_water + 1; i <= NF; i++) {
+			unref(fields_arr[i]);
+			getnode(n);
+			*n = *Null_field;
+			fields_arr[i] = n;
+		}
+	} else if (parse_high_water > 0) {
+		for (i = NF + 1; i <= parse_high_water; i++) {
+			unref(fields_arr[i]);
+			getnode(n);
+			*n = *Null_field;
+			fields_arr[i] = n;
+		}
+		parse_high_water = NF;
+	}
+	field0_valid = FALSE;
+}
+
+/*
+ * re_parse_field --- parse fields using a regexp.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)().  This variation is for when FS is a regular
+ * expression -- either user-defined or because RS=="" and FS==" "
+ */
+static long
+re_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to;	/* parse only up to this field number */
+char **buf;	/* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set;	/* routine to set the value of the parsed field */
+NODE *n;
+{
+	register char *scan = *buf;
+	register long nf = parse_high_water;
+	register char *field;
+	register char *end = scan + len;
+
+	if (up_to == HUGE)
+		nf = 0;
+	if (len == 0)
+		return nf;
+
+	if (RS_is_null && default_FS)
+		while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
+			scan++;
+	field = scan;
+	while (scan < end
+	       && research(rp, scan, 0, (end - scan), TRUE) != -1
+	       && nf < up_to) {
+		if (REEND(rp, scan) == RESTART(rp, scan)) {   /* null match */
+			scan++;
+			if (scan == end) {
+				(*set)(++nf, field, (long)(scan - field), n);
+				up_to = nf;
+				break;
+			}
+			continue;
+		}
+		(*set)(++nf, field,
+		       (long)(scan + RESTART(rp, scan) - field), n);
+		scan += REEND(rp, scan);
+		field = scan;
+		if (scan == end)	/* FS at end of record */
+			(*set)(++nf, field, 0L, n);
+	}
+	if (nf != up_to && scan < end) {
+		(*set)(++nf, scan, (long)(end - scan), n);
+		scan = end;
+	}
+	*buf = scan;
+	return (nf);
+}
+
+/*
+ * def_parse_field --- default field parsing.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)().  This variation is for when FS is a single space
+ * character.
+ */
+
+static long
+def_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to;	/* parse only up to this field number */
+char **buf;	/* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set;	/* routine to set the value of the parsed field */
+NODE *n;
+{
+	register char *scan = *buf;
+	register long nf = parse_high_water;
+	register char *field;
+	register char *end = scan + len;
+	char sav;
+
+	if (up_to == HUGE)
+		nf = 0;
+	if (len == 0)
+		return nf;
+
+	/*
+	 * Nasty special case. If FS set to "", return whole record
+	 * as first field. This is not worth a separate function.
+	 */
+	if (fs->stlen == 0) {
+		(*set)(++nf, *buf, len, n);
+		*buf += len;
+		return nf;
+	}
+
+	/* before doing anything save the char at *end */
+	sav = *end;
+	/* because it will be destroyed now: */
+
+	*end = ' ';	/* sentinel character */
+	for (; nf < up_to; scan++) {
+		/*
+		 * special case:  fs is single space, strip leading whitespace 
+		 */
+		while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
+			scan++;
+		if (scan >= end)
+			break;
+		field = scan;
+		while (*scan != ' ' && *scan != '\t' && *scan != '\n')
+			scan++;
+		(*set)(++nf, field, (long)(scan - field), n);
+		if (scan == end)
+			break;
+	}
+
+	/* everything done, restore original char at *end */
+	*end = sav;
+
+	*buf = scan;
+	return nf;
+}
+
+/*
+ * posix_def_parse_field --- default field parsing.
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)().  This variation is for when FS is a single space
+ * character.  The only difference between this and def_parse_field()
+ * is that this one does not allow newlines to separate fields.
+ */
+
+static long
+posix_def_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to;	/* parse only up to this field number */
+char **buf;	/* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set;	/* routine to set the value of the parsed field */
+NODE *n;
+{
+	register char *scan = *buf;
+	register long nf = parse_high_water;
+	register char *field;
+	register char *end = scan + len;
+	char sav;
+
+	if (up_to == HUGE)
+		nf = 0;
+	if (len == 0)
+		return nf;
+
+	/*
+	 * Nasty special case. If FS set to "", return whole record
+	 * as first field. This is not worth a separate function.
+	 */
+	if (fs->stlen == 0) {
+		(*set)(++nf, *buf, len, n);
+		*buf += len;
+		return nf;
+	}
+
+	/* before doing anything save the char at *end */
+	sav = *end;
+	/* because it will be destroyed now: */
+
+	*end = ' ';	/* sentinel character */
+	for (; nf < up_to; scan++) {
+		/*
+		 * special case:  fs is single space, strip leading whitespace 
+		 */
+		while (scan < end && (*scan == ' ' || *scan == '\t'))
+			scan++;
+		if (scan >= end)
+			break;
+		field = scan;
+		while (*scan != ' ' && *scan != '\t')
+			scan++;
+		(*set)(++nf, field, (long)(scan - field), n);
+		if (scan == end)
+			break;
+	}
+
+	/* everything done, restore original char at *end */
+	*end = sav;
+
+	*buf = scan;
+	return nf;
+}
+
+/*
+ * null_parse_field --- each character is a separate field
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)().  This variation is for when FS is the null string.
+ */
+static long
+null_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to;	/* parse only up to this field number */
+char **buf;	/* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set;	/* routine to set the value of the parsed field */
+NODE *n;
+{
+	register char *scan = *buf;
+	register long nf = parse_high_water;
+	register char *end = scan + len;
+
+	if (up_to == HUGE)
+		nf = 0;
+	if (len == 0)
+		return nf;
+
+	for (; nf < up_to && scan < end; scan++)
+		(*set)(++nf, scan, 1L, n);
+
+	*buf = scan;
+	return nf;
+}
+
+/*
+ * sc_parse_field --- single character field separator
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)().  This variation is for when FS is a single character
+ * other than space.
+ */
+static long
+sc_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to;	/* parse only up to this field number */
+char **buf;	/* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set;	/* routine to set the value of the parsed field */
+NODE *n;
+{
+	register char *scan = *buf;
+	register char fschar;
+	register long nf = parse_high_water;
+	register char *field;
+	register char *end = scan + len;
+	int onecase;
+	char sav;
+
+	if (up_to == HUGE)
+		nf = 0;
+	if (len == 0)
+		return nf;
+
+	if (RS_is_null && fs->stlen == 0)
+		fschar = '\n';
+	else
+		fschar = fs->stptr[0];
+
+	onecase = (IGNORECASE && isalpha(fschar));
+	if (onecase)
+		fschar = casetable[(int) fschar];
+
+	/* before doing anything save the char at *end */
+	sav = *end;
+	/* because it will be destroyed now: */
+	*end = fschar;	/* sentinel character */
+
+	for (; nf < up_to;) {
+		field = scan;
+		if (onecase) {
+			while (casetable[(int) *scan] != fschar)
+				scan++;
+		} else {
+			while (*scan != fschar)
+				scan++;
+		}
+		(*set)(++nf, field, (long)(scan - field), n);
+		if (scan == end)
+			break;
+		scan++;
+		if (scan == end) {	/* FS at end of record */
+			(*set)(++nf, field, 0L, n);
+			break;
+		}
+	}
+
+	/* everything done, restore original char at *end */
+	*end = sav;
+
+	*buf = scan;
+	return nf;
+}
+
+/*
+ * fw_parse_field --- field parsing using FIELDWIDTHS spec
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)().  This variation is for fields are fixed widths.
+ */
+static long
+fw_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to;	/* parse only up to this field number */
+char **buf;	/* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set;	/* routine to set the value of the parsed field */
+NODE *n;
+{
+	register char *scan = *buf;
+	register long nf = parse_high_water;
+	register char *end = scan + len;
+
+	if (up_to == HUGE)
+		nf = 0;
+	if (len == 0)
+		return nf;
+	for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
+		if (len > end - scan)
+			len = end - scan;
+		(*set)(++nf, scan, (long) len, n);
+		scan += len;
+	}
+	if (len == -1)
+		*buf = end;
+	else
+		*buf = scan;
+	return nf;
+}
+
+/* get_field --- return a particular $n */
+
+NODE **
+get_field(requested, assign)
+register long requested;
+Func_ptr *assign;	/* this field is on the LHS of an assign */
+{
+	/*
+	 * if requesting whole line but some other field has been altered,
+	 * then the whole line must be rebuilt
+	 */
+	if (requested == 0) {
+		if (! field0_valid) {
+			/* first, parse remainder of input record */
+			if (NF == -1) {
+				NF = (*parse_field)(HUGE-1, &parse_extent,
+		    			fields_arr[0]->stlen -
+					(parse_extent - fields_arr[0]->stptr),
+		    			save_FS, FS_regexp, set_field,
+					(NODE *) NULL);
+				parse_high_water = NF;
+			}
+			rebuild_record();
+		}
+		if (assign != NULL)
+			*assign = reset_record;
+		return &fields_arr[0];
+	}
+
+	/* assert(requested > 0); */
+
+	if (assign != NULL)
+		field0_valid = FALSE;		/* $0 needs reconstruction */
+
+	if (requested <= parse_high_water)	/* already parsed this field */
+		return &fields_arr[requested];
+
+	if (NF == -1) {	/* have not yet parsed to end of record */
+		/*
+		 * parse up to requested fields, calling set_field() for each,
+		 * saving in parse_extent the point where the parse left off
+		 */
+		if (parse_high_water == 0)	/* starting at the beginning */
+			parse_extent = fields_arr[0]->stptr;
+		parse_high_water = (*parse_field)(requested, &parse_extent,
+		     fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr),
+		     save_FS, FS_regexp, set_field, (NODE *) NULL);
+
+		/*
+		 * if we reached the end of the record, set NF to the number of
+		 * fields so far.  Note that requested might actually refer to
+		 * a field that is beyond the end of the record, but we won't
+		 * set NF to that value at this point, since this is only a
+		 * reference to the field and NF only gets set if the field
+		 * is assigned to -- this case is handled below
+		 */
+		if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
+			NF = parse_high_water;
+		if (requested == HUGE-1)	/* HUGE-1 means set NF */
+			requested = parse_high_water;
+	}
+	if (parse_high_water < requested) { /* requested beyond end of record */
+		if (assign != NULL) {	/* expand record */
+			if (requested > nf_high_water)
+				grow_fields_arr(requested);
+
+			NF = requested;
+			parse_high_water = requested;
+		} else
+			return &Null_field;
+	}
+
+	return &fields_arr[requested];
+}
+
+/* set_element --- set an array element, used by do_split() */
+
+static void
+set_element(num, s, len, n)
+long num;
+char *s;
+long len;
+NODE *n;
+{
+	register NODE *it;
+
+	it = make_string(s, len);
+	it->flags |= MAYBE_NUM;
+	*assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
+}
+
+/* do_split --- implement split(), semantics are same as for field splitting */
+
+NODE *
+do_split(tree)
+NODE *tree;
+{
+	NODE *src, *arr, *sep, *tmp;
+	NODE *fs;
+	char *s;
+	long (*parseit) P((long, char **, int, NODE *,
+			 Regexp *, Setfunc, NODE *));
+	Regexp *rp = NULL;
+
+	/*
+	 * do dupnode(), to avoid problems like
+	 *	x = split(a[1], a, "blah")
+	 * since we assoc_clear the array. gack.
+	 * this also gives us complete call by value semantics.
+	 */
+	tmp = tree_eval(tree->lnode);
+	src = dupnode(tmp);
+	free_temp(tmp);
+
+	arr = tree->rnode->lnode;
+	if (tree->rnode->rnode != NULL)
+		sep = tree->rnode->rnode->lnode;	/* 3rd arg */
+	else
+		sep = NULL;
+
+	(void) force_string(src);
+
+	if (arr->type == Node_param_list)
+		arr = stack_ptr[arr->param_cnt];
+	if (arr->type != Node_var && arr->type != Node_var_array)
+		fatal("second argument of split is not an array");
+	arr->type = Node_var_array;
+	assoc_clear(arr);
+
+	if (sep->re_flags & FS_DFLT) {
+		parseit = parse_field;
+		fs = force_string(FS_node->var_value);
+		rp = FS_regexp;
+	} else {
+		tmp = force_string(tree_eval(sep->re_exp));
+		if (tmp->stlen == 0)
+			parseit = null_parse_field;
+		else if (tmp->stlen == 1 && (sep->re_flags & CONST) == 0) {
+			if (tmp->stptr[0] == ' ') {
+				if (do_posix)
+					parseit = posix_def_parse_field;
+				else
+					parseit = def_parse_field;
+			} else
+				parseit = sc_parse_field;
+		} else {
+			parseit = re_parse_field;
+			rp = re_update(sep);
+		}
+		fs = tmp;
+	}
+
+	s = src->stptr;
+	tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) src->stlen,
+					     fs, rp, set_element, arr));
+	unref(src);
+	free_temp(sep);
+	return tmp;
+}
+
+/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
+
+void
+set_FIELDWIDTHS()
+{
+	register char *scan;
+	char *end;
+	register int i;
+	static int fw_alloc = 1;
+	static int warned = FALSE;
+	extern double strtod();
+
+	if (do_lint && ! warned) {
+		warned = TRUE;
+		warning("use of FIELDWIDTHS is a gawk extension");
+	}
+	if (do_traditional)	/* quick and dirty, does the trick */
+		return;
+
+	/*
+	 * If changing the way fields are split, obey least-suprise
+	 * semantics, and force $0 to be split totally.
+	 */
+	if (fields_arr != NULL)
+		(void) get_field(HUGE - 1, 0);
+
+	parse_field = fw_parse_field;
+	scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
+	end = scan + 1;
+	if (FIELDWIDTHS == NULL)
+		emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+	FIELDWIDTHS[0] = 0;
+	for (i = 1; ; i++) {
+		if (i >= fw_alloc) {
+			fw_alloc *= 2;
+			erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+		}
+		FIELDWIDTHS[i] = (int) strtod(scan, &end);
+		if (end == scan)
+			break;
+		scan = end;
+	}
+	FIELDWIDTHS[i] = -1;
+}
+
+void
+set_FS_if_not_FIELDWIDTHS()
+{
+	if (parse_field != fw_parse_field)
+		set_FS();
+}
+
+/* set_FS --- handle things when FS is assigned to */
+
+void
+set_FS()
+{
+	char buf[10];
+	NODE *fs;
+	static NODE *save_fs = NULL;
+	static NODE *save_rs = NULL;
+
+	/*
+	 * If changing the way fields are split, obey least-suprise
+	 * semantics, and force $0 to be split totally.
+	 */
+	if (fields_arr != NULL)
+		(void) get_field(HUGE - 1, 0);
+
+	if (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0
+	 && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
+		return;
+	unref(save_fs);
+	save_fs = dupnode(FS_node->var_value);
+	unref(save_rs);
+	save_rs = dupnode(RS_node->var_value);
+	resave_fs = TRUE;
+  	buf[0] = '\0';
+  	default_FS = FALSE;
+  	if (FS_regexp) {
+		refree(FS_regexp);
+		FS_regexp = NULL;
+	}
+	fs = force_string(FS_node->var_value);
+	if (! do_traditional && fs->stlen == 0)
+		parse_field = null_parse_field;
+	else if (fs->stlen > 1)
+		parse_field = re_parse_field;
+	else if (RS_is_null) {
+		parse_field = sc_parse_field;
+		if (fs->stlen == 1) {
+			if (fs->stptr[0] == ' ') {
+				default_FS = TRUE;
+				strcpy(buf, "[ \t\n]+");
+			} else if (fs->stptr[0] != '\n')
+				sprintf(buf, "[%c\n]", fs->stptr[0]);
+		}
+	} else {
+		if (do_posix)
+			parse_field = posix_def_parse_field;
+		else
+			parse_field = def_parse_field;
+		if (fs->stptr[0] == ' ' && fs->stlen == 1)
+			default_FS = TRUE;
+		else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
+			if (! IGNORECASE)
+				parse_field = sc_parse_field;
+			else if (fs->stptr[0] == '\\')
+				/* yet another special case */
+				strcpy(buf, "[\\\\]");
+			else
+				sprintf(buf, "[%c]", fs->stptr[0]);
+		}
+	}
+	if (buf[0] != '\0') {
+		FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, TRUE);
+		parse_field = re_parse_field;
+	} else if (parse_field == re_parse_field) {
+		FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, TRUE);
+	} else
+		FS_regexp = NULL;
+}
+
+/* using_fieldwidths --- is FS or FIELDWIDTHS in use? */
+
+int
+using_fieldwidths()
+{
+	return 	parse_field == fw_parse_field;
+}
+
diff --git a/contrib/awk/gawkmisc.c b/contrib/awk/gawkmisc.c
new file mode 100644
index 0000000..0707971
--- /dev/null
+++ b/contrib/awk/gawkmisc.c
@@ -0,0 +1,63 @@
+/*
+ * gawkmisc.c --- miscellanious gawk routines that are OS specific.
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991 - 97 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+
+/* some old compilers don't grok #elif. sigh */
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32)
+#include "gawkmisc.pc"
+#else
+#if defined(VMS)
+#include "vms/gawkmisc.vms"
+#else
+#if defined(atarist)
+#include "atari/gawkmisc.atr"
+#else
+#include "posix/gawkmisc.c"
+#endif
+#endif
+#endif
+
+/* xmalloc --- provide this so that other GNU library routines work */
+
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
+
+extern pointer xmalloc P((size_t bytes));	/* get rid of gcc warning */
+
+pointer
+xmalloc(bytes)
+size_t bytes;
+{
+	pointer p;
+
+	emalloc(p, pointer, bytes, "xmalloc");
+
+	return p;
+}
diff --git a/contrib/awk/getopt.c b/contrib/awk/getopt.c
new file mode 100644
index 0000000..eac576b
--- /dev/null
+++ b/contrib/awk/getopt.c
@@ -0,0 +1,1000 @@
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+   before changing it!
+
+   Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97
+   	Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.  Its master source is NOT part of
+   the C library, however.  The master source lives in /gd/gnu/lib.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef	__GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+#include <stdlib.h>
+#include <unistd.h>
+#endif	/* GNU C library.  */
+
+#ifdef VMS
+#include <unixlib.h>
+#if HAVE_STRING_H - 0
+#include <string.h>
+#endif
+#endif
+
+#if defined (WIN32) && !defined (__CYGWIN32__)
+/* It's not Unix, really.  See?  Capital letters.  */
+#include <windows.h>
+#define getpid() GetCurrentProcessId()
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+   When compiling libc, the _ macro is predefined.  */
+#ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# define _(msgid)	gettext (msgid)
+#else
+# define _(msgid)	(msgid)
+#endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+   causes problems with re-calling getopt as programs generally don't
+   know that. */
+
+int __getopt_initialized = 0;
+
+/* The next char to be scanned in the option-element
+   in which the last option character we returned was found.
+   This allows us to pick up the scan where we left off.
+
+   If this is zero, or a null string, it means resume the scan
+   by advancing to the next ARGV-element.  */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+   If the caller did not specify anything,
+   the default is REQUIRE_ORDER if the environment variable
+   POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+   REQUIRE_ORDER means don't recognize them as options;
+   stop option processing when the first non-option is seen.
+   This is what Unix does.
+   This mode of operation is selected by either setting the environment
+   variable POSIXLY_CORRECT, or using `+' as the first character
+   of the list of option characters.
+
+   PERMUTE is the default.  We permute the contents of ARGV as we scan,
+   so that eventually all the non-options are at the end.  This allows options
+   to be given in any order, even with programs that were not written to
+   expect this.
+
+   RETURN_IN_ORDER is an option available to programs that were written
+   to expect options and other ARGV-elements in any order and that care about
+   the ordering of the two.  We describe each non-option ARGV-element
+   as if it were the argument of an option with character code 1.
+   Using `-' as the first character of the list of option characters
+   selects this mode of operation.
+
+   The special argument `--' forces an end of option-scanning regardless
+   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
+
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable.  */
+static char *posixly_correct;
+
+#ifdef	__GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+   because there are many ways it can cause trouble.
+   On some systems, it contains special magic macros that don't work
+   in GCC.  */
+#include <string.h>
+#define	my_index	strchr
+#else
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+char *getenv ();
+
+static char *
+my_index (str, chr)
+     const char *str;
+     int chr;
+{
+  while (*str)
+    {
+      if (*str == chr)
+	return (char *) str;
+      str++;
+    }
+  return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+   If not using GCC, it is ok not to declare it.  */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+   That was relevant to code that was here before.  */
+#if !defined (__STDC__) || !__STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+   and has done so at least since version 2.4.5. -- rms.  */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments.  */
+
+/* Describe the part of ARGV that contains non-options that have
+   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+   `last_nonopt' is the index after the last of them.  */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+   indicating ARGV elements that should not be considered arguments.  */
+
+static const char *nonoption_flags;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+   is valid for the getopt call we must make sure that the ARGV passed
+   to getopt is that one passed to the process.  */
+static void store_args (int argc, char *const *argv) __attribute__ ((unused));
+static void
+store_args (int argc, char *const *argv)
+{
+  /* XXX This is no good solution.  We should rather copy the args so
+     that we can compare them later.  But we must not use malloc(3).  */
+  original_argc = argc;
+  original_argv = argv;
+}
+text_set_element (__libc_subinit, store_args);
+#endif
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+#if defined (__STDC__) && __STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+     char **argv;
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+	{
+	  /* Bottom segment is the short one.  */
+	  int len = middle - bottom;
+	  register int i;
+
+	  /* Swap it with the top part of the top segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[top - (middle - bottom) + i];
+	      argv[top - (middle - bottom) + i] = tem;
+	    }
+	  /* Exclude the moved bottom segment from further swapping.  */
+	  top -= len;
+	}
+      else
+	{
+	  /* Top segment is the short one.  */
+	  int len = top - middle;
+	  register int i;
+
+	  /* Swap it with the bottom part of the bottom segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[middle + i];
+	      argv[middle + i] = tem;
+	    }
+	  /* Exclude the moved top segment from further swapping.  */
+	  bottom += len;
+	}
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (optind - last_nonopt);
+  last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+#if defined (__STDC__) && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = optind = 1;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+#ifdef _LIBC
+  if (posixly_correct == NULL
+      && argc == original_argc && argv == original_argv)
+    {
+      /* Bash 2.0 puts a special variable in the environment for each
+	 command it runs, specifying which ARGV elements are the results of
+	 file name wildcard expansion and therefore should not be
+	 considered as options.  */
+      char var[100];
+      sprintf (var, "_%d_GNU_nonoption_argv_flags_", getpid ());
+      nonoption_flags = getenv (var);
+      if (nonoption_flags == NULL)
+	nonoption_flags_len = 0;
+      else
+	nonoption_flags_len = strlen (nonoption_flags);
+    }
+  else
+    nonoption_flags_len = 0;
+#endif
+
+  return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+     const struct option *longopts;
+     int *longind;
+     int long_only;
+{
+  optarg = NULL;
+
+  if (!__getopt_initialized || optind == 0)
+    {
+      optstring = _getopt_initialize (argc, argv, optstring);
+      optind = 1;		/* Don't scan ARGV[0], the program name.  */
+      __getopt_initialized = 1;
+    }
+
+  /* Test whether ARGV[optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+#ifdef _LIBC
+#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'	      \
+		     || (optind < nonoption_flags_len			      \
+			 && nonoption_flags[optind] == '1'))
+#else
+#define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+  if (nextchar == NULL || *nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+	 moved back by the user (who may also have changed the arguments).  */
+      if (last_nonopt > optind)
+	last_nonopt = optind;
+      if (first_nonopt > optind)
+	first_nonopt = optind;
+
+      if (ordering == PERMUTE)
+	{
+	  /* If we have just processed some options following some non-options,
+	     exchange them so that the options come first.  */
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (last_nonopt != optind)
+	    first_nonopt = optind;
+
+	  /* Skip any additional non-options
+	     and extend the range of non-options previously skipped.  */
+
+	  while (optind < argc && NONOPTION_P)
+	    optind++;
+	  last_nonopt = optind;
+	}
+
+      /* The special ARGV-element `--' means premature end of options.
+	 Skip it like a null option,
+	 then exchange with previous non-options as if it were an option,
+	 then skip everything else like a non-option.  */
+
+      if (optind != argc && !strcmp (argv[optind], "--"))
+	{
+	  optind++;
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (first_nonopt == last_nonopt)
+	    first_nonopt = optind;
+	  last_nonopt = argc;
+
+	  optind = argc;
+	}
+
+      /* If we have done all the ARGV-elements, stop the scan
+	 and back over any non-options that we skipped and permuted.  */
+
+      if (optind == argc)
+	{
+	  /* Set the next-arg-index to point at the non-options
+	     that we previously skipped, so the caller will digest them.  */
+	  if (first_nonopt != last_nonopt)
+	    optind = first_nonopt;
+	  return -1;
+	}
+
+      /* If we have come to a non-option and did not permute it,
+	 either stop the scan or describe it to the caller and pass it by.  */
+
+      if (NONOPTION_P)
+	{
+	  if (ordering == REQUIRE_ORDER)
+	    return -1;
+	  optarg = argv[optind++];
+	  return 1;
+	}
+
+      /* We have found another option-ARGV-element.
+	 Skip the initial punctuation.  */
+
+      nextchar = (argv[optind] + 1
+		  + (longopts != NULL && argv[optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[optind][1] == '-'
+	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = -1;
+      int option_index;
+
+      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+	 or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (!strncmp (p->name, nextchar, nameend - nextchar))
+	  {
+	    if ((unsigned int) (nameend - nextchar)
+		== (unsigned int) strlen (p->name))
+	      {
+		/* Exact match found.  */
+		pfound = p;
+		indfound = option_index;
+		exact = 1;
+		break;
+	      }
+	    else if (pfound == NULL)
+	      {
+		/* First nonexact match found.  */
+		pfound = p;
+		indfound = option_index;
+	      }
+	    else
+	      /* Second or later nonexact match found.  */
+	      ambig = 1;
+	  }
+
+      if (ambig && !exact)
+	{
+	  if (opterr)
+	    fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+		     argv[0], argv[optind]);
+	  nextchar += strlen (nextchar);
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+
+      if (pfound != NULL)
+	{
+	  option_index = indfound;
+	  optind++;
+	  if (*nameend)
+	    {
+	      /* Don't test has_arg with >, because some C compilers don't
+		 allow it to be used on enums.  */
+	      if (pfound->has_arg)
+		optarg = nameend + 1;
+	      else
+		{
+		  if (opterr)
+		   if (argv[optind - 1][1] == '-')
+		    /* --option */
+		    fprintf (stderr,
+		     _("%s: option `--%s' doesn't allow an argument\n"),
+		     argv[0], pfound->name);
+		   else
+		    /* +option or -option */
+		    fprintf (stderr,
+		     _("%s: option `%c%s' doesn't allow an argument\n"),
+		     argv[0], argv[optind - 1][0], pfound->name);
+
+		  nextchar += strlen (nextchar);
+
+		  optopt = pfound->val;
+		  return '?';
+		}
+	    }
+	  else if (pfound->has_arg == 1)
+	    {
+	      if (optind < argc)
+		optarg = argv[optind++];
+	      else
+		{
+		  if (opterr)
+		    fprintf (stderr,
+			   _("%s: option `%s' requires an argument\n"),
+			   argv[0], argv[optind - 1]);
+		  nextchar += strlen (nextchar);
+		  optopt = pfound->val;
+		  return optstring[0] == ':' ? ':' : '?';
+		}
+	    }
+	  nextchar += strlen (nextchar);
+	  if (longind != NULL)
+	    *longind = option_index;
+	  if (pfound->flag)
+	    {
+	      *(pfound->flag) = pfound->val;
+	      return 0;
+	    }
+	  return pfound->val;
+	}
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+	 or the option starts with '--' or is not a valid short
+	 option, then it's an error.
+	 Otherwise interpret it as a short option.  */
+      if (!long_only || argv[optind][1] == '-'
+	  || my_index (optstring, *nextchar) == NULL)
+	{
+	  if (opterr)
+	    {
+	      if (argv[optind][1] == '-')
+		/* --option */
+		fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+			 argv[0], nextchar);
+	      else
+		/* +option or -option */
+		fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+			 argv[0], argv[optind][0], nextchar);
+	    }
+	  nextchar = (char *) "";
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *nextchar++;
+    char *temp = my_index (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++optind;
+
+    if (temp == NULL || c == ':')
+      {
+	if (opterr)
+	  {
+	    if (posixly_correct)
+	      /* 1003.2 specifies the format of this message.  */
+	      fprintf (stderr, _("%s: illegal option -- %c\n"),
+		       argv[0], c);
+	    else
+	      fprintf (stderr, _("%s: invalid option -- %c\n"),
+		       argv[0], c);
+	  }
+	optopt = c;
+	return '?';
+      }
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';')
+      {
+	char *nameend;
+	const struct option *p;
+	const struct option *pfound = NULL;
+	int exact = 0;
+	int ambig = 0;
+	int indfound = 0;
+	int option_index;
+
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0')
+	  {
+	    optarg = nextchar;
+	    /* If we end this ARGV-element by taking the rest as an arg,
+	       we must advance to the next element now.  */
+	    optind++;
+	  }
+	else if (optind == argc)
+	  {
+	    if (opterr)
+	      {
+		/* 1003.2 specifies the format of this message.  */
+		fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+			 argv[0], c);
+	      }
+	    optopt = c;
+	    if (optstring[0] == ':')
+	      c = ':';
+	    else
+	      c = '?';
+	    return c;
+	  }
+	else
+	  /* We already incremented `optind' once;
+	     increment it again when taking next ARGV-elt as argument.  */
+	  optarg = argv[optind++];
+
+	/* optarg is now the argument, see if it's in the
+	   table of longopts.  */
+
+	for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+	  /* Do nothing.  */ ;
+
+	/* Test all long options for either exact match
+	   or abbreviated matches.  */
+	for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	  if (!strncmp (p->name, nextchar, nameend - nextchar))
+	    {
+	      if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+		{
+		  /* Exact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		  exact = 1;
+		  break;
+		}
+	      else if (pfound == NULL)
+		{
+		  /* First nonexact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		}
+	      else
+		/* Second or later nonexact match found.  */
+		ambig = 1;
+	    }
+	if (ambig && !exact)
+	  {
+	    if (opterr)
+	      fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+		       argv[0], argv[optind]);
+	    nextchar += strlen (nextchar);
+	    optind++;
+	    return '?';
+	  }
+	if (pfound != NULL)
+	  {
+	    option_index = indfound;
+	    if (*nameend)
+	      {
+		/* Don't test has_arg with >, because some C compilers don't
+		   allow it to be used on enums.  */
+		if (pfound->has_arg)
+		  optarg = nameend + 1;
+		else
+		  {
+		    if (opterr)
+		      fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+			       argv[0], pfound->name);
+
+		    nextchar += strlen (nextchar);
+		    return '?';
+		  }
+	      }
+	    else if (pfound->has_arg == 1)
+	      {
+		if (optind < argc)
+		  optarg = argv[optind++];
+		else
+		  {
+		    if (opterr)
+		      fprintf (stderr,
+			       _("%s: option `%s' requires an argument\n"),
+			       argv[0], argv[optind - 1]);
+		    nextchar += strlen (nextchar);
+		    return optstring[0] == ':' ? ':' : '?';
+		  }
+	      }
+	    nextchar += strlen (nextchar);
+	    if (longind != NULL)
+	      *longind = option_index;
+	    if (pfound->flag)
+	      {
+		*(pfound->flag) = pfound->val;
+		return 0;
+	      }
+	    return pfound->val;
+	  }
+	  nextchar = NULL;
+	  return 'W';	/* Let the application handle it.   */
+      }
+    if (temp[1] == ':')
+      {
+	if (temp[2] == ':')
+	  {
+	    /* This is an option that accepts an argument optionally.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		optind++;
+	      }
+	    else
+	      optarg = NULL;
+	    nextchar = NULL;
+	  }
+	else
+	  {
+	    /* This is an option that requires an argument.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		/* If we end this ARGV-element by taking the rest as an arg,
+		   we must advance to the next element now.  */
+		optind++;
+	      }
+	    else if (optind == argc)
+	      {
+		if (opterr)
+		  {
+		    /* 1003.2 specifies the format of this message.  */
+		    fprintf (stderr,
+			   _("%s: option requires an argument -- %c\n"),
+			   argv[0], c);
+		  }
+		optopt = c;
+		if (optstring[0] == ':')
+		  c = ':';
+		else
+		  c = '?';
+	      }
+	    else
+	      /* We already incremented `optind' once;
+		 increment it again when taking next ARGV-elt as argument.  */
+	      optarg = argv[optind++];
+	    nextchar = NULL;
+	  }
+      }
+    return c;
+  }
+}
+
+int
+getopt (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  return _getopt_internal (argc, argv, optstring,
+			   (const struct option *) 0,
+			   (int *) 0,
+			   0);
+}
+
+#endif	/* Not ELIDE_CODE.  */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+   the above definition of `getopt'.  */
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+
+      c = getopt (argc, argv, "abc:d:0123456789");
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
diff --git a/contrib/awk/getopt.h b/contrib/awk/getopt.h
new file mode 100644
index 0000000..7dad11b
--- /dev/null
+++ b/contrib/awk/getopt.h
@@ -0,0 +1,133 @@
+/* Declarations for getopt.
+   Copyright (C) 1989,90,91,92,93,94,96,97 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.  Its master source is NOT part of
+   the C library, however.  The master source lives in /gd/gnu/lib.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+   of `struct option' terminated by an element containing a name which is
+   zero.
+
+   The field `has_arg' is:
+   no_argument		(or 0) if the option does not take an argument,
+   required_argument	(or 1) if the option requires an argument,
+   optional_argument 	(or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `getopt'
+   returns the contents of the `val' field.  */
+
+struct option
+{
+#if defined (__STDC__) && __STDC__
+  const char *name;
+#else
+  char *name;
+#endif
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+
+#define	no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+#if defined (__STDC__) && __STDC__
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+		        const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+			     const char *shortopts,
+		             const struct option *longopts, int *longind);
+
+/* Internal only.  Users should not call this directly.  */
+extern int _getopt_internal (int argc, char *const *argv,
+			     const char *shortopts,
+		             const struct option *longopts, int *longind,
+			     int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* __STDC__ */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/contrib/awk/getopt1.c b/contrib/awk/getopt1.c
new file mode 100644
index 0000000..8347bb1
--- /dev/null
+++ b/contrib/awk/getopt1.c
@@ -0,0 +1,189 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+   Copyright (C) 1987,88,89,90,91,92,93,94,96,97 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.  Its master source is NOT part of
+   the C library, however.  The master source lives in /gd/gnu/lib.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "getopt.h"
+
+#if !defined (__STDC__) || !__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined (_LIBC) && defined (__GLIBC__) && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef	NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif	/* Not ELIDE_CODE.  */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] =
+      {
+	{"add", 1, 0, 0},
+	{"append", 0, 0, 0},
+	{"delete", 1, 0, 0},
+	{"verbose", 0, 0, 0},
+	{"create", 0, 0, 0},
+	{"file", 1, 0, 0},
+	{0, 0, 0, 0}
+      };
+
+      c = getopt_long (argc, argv, "abc:d:0123456789",
+		       long_options, &option_index);
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case 0:
+	  printf ("option %s", long_options[option_index].name);
+	  if (optarg)
+	    printf (" with arg %s", optarg);
+	  printf ("\n");
+	  break;
+
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case 'd':
+	  printf ("option d with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
diff --git a/contrib/awk/io.c b/contrib/awk/io.c
new file mode 100644
index 0000000..74d9a8d
--- /dev/null
+++ b/contrib/awk/io.c
@@ -0,0 +1,1941 @@
+/*
+ * io.c --- routines for dealing with input and output and records
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+#undef HAVE_MMAP	/* for now, probably forever */
+
+#ifdef HAVE_SYS_PARAM_H
+#undef RE_DUP_MAX	/* avoid spurious conflict w/regex.h */
+#include <sys/param.h>
+#endif /* HAVE_SYS_PARAM_H */
+
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif /* HAVE_SYS_WAIT_H */
+
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#ifndef MAP_FAILED
+#define MAP_FAILED	((caddr_t) -1)
+#endif /* ! defined (MAP_FAILED) */
+#endif /* HAVE_MMAP */
+
+#ifndef O_RDONLY
+#include <fcntl.h>
+#endif
+#ifndef O_ACCMODE
+#define O_ACCMODE	(O_RDONLY|O_WRONLY|O_RDWR)
+#endif
+
+#include <assert.h>
+
+#if ! defined(S_ISREG) && defined(S_IFREG)
+#define	S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+
+#if ! defined(S_ISDIR) && defined(S_IFDIR)
+#define	S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+
+#ifndef ENFILE
+#define ENFILE EMFILE
+#endif
+
+#ifdef atarist
+#include <stddef.h>
+#endif
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32)
+#define PIPES_SIMULATED
+#endif
+
+static IOBUF *nextfile P((int skipping));
+static int inrec P((IOBUF *iop));
+static int iop_close P((IOBUF *iop));
+struct redirect *redirect P((NODE *tree, int *errflg));
+static void close_one P((void));
+static int close_redir P((struct redirect *rp, int exitwarn));
+#ifndef PIPES_SIMULATED
+static int wait_any P((int interesting));
+#endif
+static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
+static IOBUF *iop_open P((const char *file, const char *how, IOBUF *buf));
+static IOBUF *iop_alloc P((int fd, const char *name, IOBUF *buf));
+static int gawk_pclose P((struct redirect *rp));
+static int do_pathopen P((const char *file));
+static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
+#ifdef HAVE_MMAP
+static int mmap_get_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
+#endif /* HAVE_MMAP */
+static int str2mode P((const char *mode));
+static void spec_setup P((IOBUF *iop, int len, int allocate));
+static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
+static int pidopen P((IOBUF *iop, const char *name, const char *mode));
+static int useropen P((IOBUF *iop, const char *name, const char *mode));
+
+#if defined (MSDOS) && !defined (__GO32__)
+#include "popen.h"
+#define popen(c, m)	os_popen(c, m)
+#define pclose(f)	os_pclose(f)
+#else
+#if defined (OS2)	/* OS/2, but not family mode */
+#if defined (_MSC_VER)
+#define popen(c, m)	_popen(c, m)
+#define pclose(f)	_pclose(f)
+#endif
+#else
+extern FILE	*popen();
+#endif
+#endif
+
+static struct redirect *red_head = NULL;
+static NODE *RS;
+static Regexp *RS_regexp;
+
+int RS_is_null;
+
+extern int output_is_tty;
+extern NODE *ARGC_node;
+extern NODE *ARGV_node;
+extern NODE *ARGIND_node;
+extern NODE *ERRNO_node;
+extern NODE **fields_arr;
+
+static jmp_buf filebuf;		/* for do_nextfile() */
+
+/* do_nextfile --- implement gawk "nextfile" extension */
+
+void
+do_nextfile()
+{
+	(void) nextfile(TRUE);
+	longjmp(filebuf, 1);
+}
+
+/* nextfile --- move to the next input data file */
+
+static IOBUF *
+nextfile(skipping)
+int skipping;
+{
+	static long i = 1;
+	static int files = 0;
+	NODE *arg;
+	static IOBUF *curfile = NULL;
+	static IOBUF mybuf;
+	const char *fname;
+
+	if (skipping) {
+		if (curfile != NULL)
+			iop_close(curfile);
+		curfile = NULL;
+		return NULL;
+	}
+	if (curfile != NULL) {
+		if (curfile->cnt == EOF) {
+			(void) iop_close(curfile);
+			curfile = NULL;
+		} else
+			return curfile;
+	}
+	for (; i < (long) (ARGC_node->lnode->numbr); i++) {
+		arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
+		if (arg->stlen == 0)
+			continue;
+		arg->stptr[arg->stlen] = '\0';
+		if (! do_traditional) {
+			unref(ARGIND_node->var_value);
+			ARGIND_node->var_value = make_number((AWKNUM) i);
+		}
+		if (! arg_assign(arg->stptr)) {
+			files++;
+			fname = arg->stptr;
+			curfile = iop_open(fname, "r", &mybuf);
+			if (curfile == NULL)
+				goto give_up;
+			curfile->flag |= IOP_NOFREE_OBJ;
+			/* This is a kludge.  */
+			unref(FILENAME_node->var_value);
+			FILENAME_node->var_value = dupnode(arg);
+			FNR = 0;
+			i++;
+			break;
+		}
+	}
+	if (files == 0) {
+		files++;
+		/* no args. -- use stdin */
+		/* FNR is init'ed to 0 */
+		FILENAME_node->var_value = make_string("-", 1);
+		fname = "-";
+		curfile = iop_open(fname, "r", &mybuf);
+		if (curfile == NULL)
+			goto give_up;
+		curfile->flag |= IOP_NOFREE_OBJ;
+	}
+	return curfile;
+
+ give_up:
+	fatal("cannot open file `%s' for reading (%s)",
+		fname, strerror(errno));
+	/* NOTREACHED */
+	return 0;
+}
+
+/* set_FNR --- update internal FNR from awk variable */
+
+void
+set_FNR()
+{
+	FNR = (long) FNR_node->var_value->numbr;
+}
+
+/* set_NR --- update internal NR from awk variable */
+
+void
+set_NR()
+{
+	NR = (long) NR_node->var_value->numbr;
+}
+
+/* inrec --- This reads in a record from the input file */
+
+static int
+inrec(iop)
+IOBUF *iop;
+{
+	char *begin;
+	register int cnt;
+	int retval = 0;
+
+	if ((cnt = iop->cnt) != EOF)
+		cnt = (*(iop->getrec))
+				(&begin, iop, RS->stptr[0], RS_regexp, NULL);
+	if (cnt == EOF) {
+		cnt = 0;
+		retval = 1;
+	} else {
+		NR += 1;
+		FNR += 1;
+		set_record(begin, cnt, TRUE);
+	}
+
+	return retval;
+}
+
+/* iop_close --- close an open IOP */
+
+static int
+iop_close(iop)
+IOBUF *iop;
+{
+	int ret;
+
+	if (iop == NULL)
+		return 0;
+	errno = 0;
+
+#ifdef _CRAY
+	/* Work around bug in UNICOS popen */
+	if (iop->fd < 3)
+		ret = 0;
+	else
+#endif
+	/* save these for re-use; don't free the storage */
+	if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+		iop->off = iop->buf;
+		iop->end = iop->buf + strlen(iop->buf);
+		iop->cnt = 0;
+		iop->secsiz = 0;
+		return 0;
+	}
+
+	/* Don't close standard files or else crufty code elsewhere will lose */
+	if (iop->fd == fileno(stdin)
+	    || iop->fd == fileno(stdout)
+	    || iop->fd == fileno(stderr)
+	    || (iop->flag & IOP_MMAPPED) != 0)
+		ret = 0;
+	else
+		ret = close(iop->fd);
+
+	if (ret == -1)
+		warning("close of fd %d (`%s') failed (%s)", iop->fd,
+				iop->name, strerror(errno));
+	if ((iop->flag & IOP_NO_FREE) == 0) {
+		/*
+		 * Be careful -- $0 may still reference the buffer even though
+		 * an explicit close is being done; in the future, maybe we
+		 * can do this a bit better.
+		 */
+		if (iop->buf) {
+			if ((fields_arr[0]->stptr >= iop->buf)
+			    && (fields_arr[0]->stptr < (iop->buf + iop->secsiz + iop->size))) {
+				NODE *t;
+	
+				t = make_string(fields_arr[0]->stptr,
+						fields_arr[0]->stlen);
+				unref(fields_arr[0]);
+				fields_arr[0] = t;
+				reset_record();
+			}
+			if ((iop->flag & IOP_MMAPPED) == 0)
+  				free(iop->buf);
+#ifdef HAVE_MMAP
+			else
+				(void) munmap(iop->buf, iop->size);
+#endif
+		}
+		if ((iop->flag & IOP_NOFREE_OBJ) == 0)
+			free((char *) iop);
+	}
+	return ret == -1 ? 1 : 0;
+}
+
+/* do_input --- the main input processing loop */
+
+void
+do_input()
+{
+	IOBUF *iop;
+	extern int exiting;
+
+	(void) setjmp(filebuf);	/* for `nextfile' */
+
+	while ((iop = nextfile(FALSE)) != NULL) {
+		if (inrec(iop) == 0)
+			while (interpret(expression_value) && inrec(iop) == 0)
+				continue;
+#ifdef C_ALLOCA
+		/* recover any space from C based alloca */
+		(void) alloca(0);
+#endif
+		if (exiting)
+			break;
+	}
+}
+
+/* redirect --- Redirection for printf and print commands */
+
+struct redirect *
+redirect(tree, errflg)
+NODE *tree;
+int *errflg;
+{
+	register NODE *tmp;
+	register struct redirect *rp;
+	register char *str;
+	int tflag = 0;
+	int outflag = 0;
+	const char *direction = "to";
+	const char *mode;
+	int fd;
+	const char *what = NULL;
+
+	switch (tree->type) {
+	case Node_redirect_append:
+		tflag = RED_APPEND;
+		/* FALL THROUGH */
+	case Node_redirect_output:
+		outflag = (RED_FILE|RED_WRITE);
+		tflag |= outflag;
+		if (tree->type == Node_redirect_output)
+			what = ">";
+		else
+			what = ">>";
+		break;
+	case Node_redirect_pipe:
+		tflag = (RED_PIPE|RED_WRITE);
+		what = "|";
+		break;
+	case Node_redirect_pipein:
+		tflag = (RED_PIPE|RED_READ);
+		what = "|";
+		break;
+	case Node_redirect_input:
+		tflag = (RED_FILE|RED_READ);
+		what = "<";
+		break;
+	default:
+		fatal("invalid tree type %d in redirect()", tree->type);
+		break;
+	}
+	tmp = tree_eval(tree->subnode);
+	if (do_lint && (tmp->flags & STR) == 0)
+		warning("expression in `%s' redirection only has numeric value",
+			what);
+	tmp = force_string(tmp);
+	str = tmp->stptr;
+
+	if (str == NULL || *str == '\0')
+		fatal("expression for `%s' redirection has null string value",
+			what);
+
+	if (do_lint
+	    && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen)))
+		warning("filename `%s' for `%s' redirection may be result of logical expression", str, what);
+	for (rp = red_head; rp != NULL; rp = rp->next)
+		if (strlen(rp->value) == tmp->stlen
+		    && STREQN(rp->value, str, tmp->stlen)
+		    && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
+			|| (outflag != 0
+			    && (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
+			break;
+	if (rp == NULL) {
+		emalloc(rp, struct redirect *, sizeof(struct redirect),
+			"redirect");
+		emalloc(str, char *, tmp->stlen+1, "redirect");
+		memcpy(str, tmp->stptr, tmp->stlen);
+		str[tmp->stlen] = '\0';
+		rp->value = str;
+		rp->flag = tflag;
+		rp->fp = NULL;
+		rp->iop = NULL;
+		rp->pid = 0;	/* unlikely that we're worried about init */
+		rp->status = 0;
+		/* maintain list in most-recently-used first order */
+		if (red_head != NULL)
+			red_head->prev = rp;
+		rp->prev = NULL;
+		rp->next = red_head;
+		red_head = rp;
+	} else
+		str = rp->value;	/* get \0 terminated string */
+	while (rp->fp == NULL && rp->iop == NULL) {
+		if (rp->flag & RED_EOF)
+			/*
+			 * encountered EOF on file or pipe -- must be cleared
+			 * by explicit close() before reading more
+			 */
+			return rp;
+		mode = NULL;
+		errno = 0;
+		switch (tree->type) {
+		case Node_redirect_output:
+			mode = "w";
+			if ((rp->flag & RED_USED) != 0)
+				mode = "a";
+			break;
+		case Node_redirect_append:
+			mode = "a";
+			break;
+		case Node_redirect_pipe:
+			/* synchronize output before new pipe */
+			(void) flush_io();
+
+			if ((rp->fp = popen(str, "w")) == NULL)
+				fatal("can't open pipe (\"%s\") for output (%s)",
+					str, strerror(errno));
+			rp->flag |= RED_NOBUF;
+			break;
+		case Node_redirect_pipein:
+			direction = "from";
+			if (gawk_popen(str, rp) == NULL)
+				fatal("can't open pipe (\"%s\") for input (%s)",
+					str, strerror(errno));
+			break;
+		case Node_redirect_input:
+			direction = "from";
+			rp->iop = iop_open(str, "r", NULL);
+			break;
+		default:
+			cant_happen();
+		}
+		if (mode != NULL) {
+			errno = 0;
+			fd = devopen(str, mode);
+			if (fd > INVALID_HANDLE) {
+				if (fd == fileno(stdin))
+					rp->fp = stdin;
+				else if (fd == fileno(stdout))
+					rp->fp = stdout;
+				else if (fd == fileno(stderr))
+					rp->fp = stderr;
+				else {
+					rp->fp = fdopen(fd, (char *) mode);
+					/* don't leak file descriptors */
+					if (rp->fp == NULL)
+						close(fd);
+				}
+				if (rp->fp != NULL && isatty(fd))
+					rp->flag |= RED_NOBUF;
+			}
+		}
+		if (rp->fp == NULL && rp->iop == NULL) {
+			/* too many files open -- close one and try again */
+			if (errno == EMFILE || errno == ENFILE)
+				close_one();
+#ifdef HAVE_MMAP
+			/* this works for solaris 2.5, not sunos */
+			else if (errno == 0)	/* HACK! */
+				close_one();
+#endif
+			else {
+				/*
+				 * Some other reason for failure.
+				 *
+				 * On redirection of input from a file,
+				 * just return an error, so e.g. getline
+				 * can return -1.  For output to file,
+				 * complain. The shell will complain on
+				 * a bad command to a pipe.
+				 */
+				if (errflg != NULL)
+					*errflg = errno;
+				if (tree->type == Node_redirect_output
+				    || tree->type == Node_redirect_append)
+					fatal("can't redirect %s `%s' (%s)",
+					    direction, str, strerror(errno));
+				else {
+					free_temp(tmp);
+					return NULL;
+				}
+			}
+		}
+	}
+	free_temp(tmp);
+	return rp;
+}
+
+/* getredirect --- find the struct redirect for this file or pipe */
+
+struct redirect *
+getredirect(str, len)
+char *str;
+int len;
+{
+	struct redirect *rp;
+
+	for (rp = red_head; rp != NULL; rp = rp->next)
+		if (strlen(rp->value) == len && STREQN(rp->value, str, len))
+			return rp;
+
+	return NULL;
+}
+
+/* close_one --- temporarily close an open file to re-use the fd */
+
+static void
+close_one()
+{
+	register struct redirect *rp;
+	register struct redirect *rplast = NULL;
+
+	/* go to end of list first, to pick up least recently used entry */
+	for (rp = red_head; rp != NULL; rp = rp->next)
+		rplast = rp;
+	/* now work back up through the list */
+	for (rp = rplast; rp != NULL; rp = rp->prev)
+		if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) {
+			rp->flag |= RED_USED;
+			errno = 0;
+			if (/* do_lint && */ fclose(rp->fp) != 0)
+				warning("close of \"%s\" failed (%s).",
+					rp->value, strerror(errno));
+			rp->fp = NULL;
+			break;
+		}
+	if (rp == NULL)
+		/* surely this is the only reason ??? */
+		fatal("too many pipes or input files open"); 
+}
+
+/* do_close --- completely close an open file or pipe */
+
+NODE *
+do_close(tree)
+NODE *tree;
+{
+	NODE *tmp;
+	register struct redirect *rp;
+
+	tmp = force_string(tree_eval(tree->subnode));
+
+	/* icky special case: close(FILENAME) called. */
+	if (tree->subnode == FILENAME_node
+	    || (tmp->stlen == FILENAME_node->var_value->stlen
+		&& STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) {
+		(void) nextfile(TRUE);
+		free_temp(tmp);
+		return tmp_number((AWKNUM) 0.0);
+	}
+
+	for (rp = red_head; rp != NULL; rp = rp->next) {
+		if (strlen(rp->value) == tmp->stlen
+		    && STREQN(rp->value, tmp->stptr, tmp->stlen))
+			break;
+	}
+	if (rp == NULL) {	/* no match */
+		if (do_lint)
+			warning("close: `%.*s' is not an open file or pipe",
+				tmp->stlen, tmp->stptr);
+		free_temp(tmp);
+		return tmp_number((AWKNUM) 0.0);
+	}
+	free_temp(tmp);
+	fflush(stdout);	/* synchronize regular output */
+	tmp = tmp_number((AWKNUM) close_redir(rp, FALSE));
+	rp = NULL;
+	return tmp;
+}
+
+/* close_redir --- close an open file or pipe */
+
+static int
+close_redir(rp, exitwarn)
+register struct redirect *rp;
+int exitwarn;
+{
+	int status = 0;
+	char *what;
+
+	if (rp == NULL)
+		return 0;
+	if (rp->fp == stdout || rp->fp == stderr)
+		return 0;
+	errno = 0;
+	if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
+		status = pclose(rp->fp);
+	else if (rp->fp != NULL)
+		status = fclose(rp->fp);
+	else if (rp->iop != NULL) {
+		if ((rp->flag & RED_PIPE) != 0)
+			status = gawk_pclose(rp);
+		else {
+			status = iop_close(rp->iop);
+			rp->iop = NULL;
+		}
+	}
+
+	what = ((rp->flag & RED_PIPE) != 0) ? "pipe" : "file";
+
+	if (exitwarn) 
+		warning("no explicit close of %s `%s' provided",
+			what, rp->value);
+
+	/* SVR4 awk checks and warns about status of close */
+	if (status != 0) {
+		char *s = strerror(errno);
+
+		/*
+		 * Too many people have complained about this.
+		 * As of 2.15.6, it is now under lint control.
+		 */
+		if (do_lint)
+			warning("failure status (%d) on %s close of \"%s\" (%s)",
+				status, what, rp->value, s);
+
+		if (! do_traditional) {
+			/* set ERRNO too so that program can get at it */
+			unref(ERRNO_node->var_value);
+			ERRNO_node->var_value = make_string(s, strlen(s));
+		}
+	}
+	if (rp->next != NULL)
+		rp->next->prev = rp->prev;
+	if (rp->prev != NULL)
+		rp->prev->next = rp->next;
+	else
+		red_head = rp->next;
+	free(rp->value);
+	free((char *) rp);
+	return status;
+}
+
+/* flush_io --- flush all open output files */
+
+int
+flush_io()
+{
+	register struct redirect *rp;
+	int status = 0;
+
+	errno = 0;
+	if (fflush(stdout)) {
+		warning("error writing standard output (%s)", strerror(errno));
+		status++;
+	}
+	if (fflush(stderr)) {
+		warning("error writing standard error (%s)", strerror(errno));
+		status++;
+	}
+	for (rp = red_head; rp != NULL; rp = rp->next)
+		/* flush both files and pipes, what the heck */
+		if ((rp->flag & RED_WRITE) && rp->fp != NULL) {
+			if (fflush(rp->fp)) {
+				warning("%s flush of \"%s\" failed (%s).",
+				    (rp->flag  & RED_PIPE) ? "pipe" :
+				    "file", rp->value, strerror(errno));
+				status++;
+			}
+		}
+	return status;
+}
+
+/* close_io --- close all open files, called when exiting */
+
+int
+close_io()
+{
+	register struct redirect *rp;
+	register struct redirect *next;
+	int status = 0;
+
+	errno = 0;
+	for (rp = red_head; rp != NULL; rp = next) {
+		next = rp->next;
+		/*
+		 * close_redir() will print a message if needed
+		 * if do_lint, warn about lack of explicit close
+		 */
+		if (close_redir(rp, do_lint))
+			status++;
+		rp = NULL;
+	}
+	/*
+	 * Some of the non-Unix os's have problems doing an fclose
+	 * on stdout and stderr.  Since we don't really need to close
+	 * them, we just flush them, and do that across the board.
+	 */
+	if (fflush(stdout)) {
+		warning("error writing standard output (%s)", strerror(errno));
+		status++;
+	}
+	if (fflush(stderr)) {
+		warning("error writing standard error (%s)", strerror(errno));
+		status++;
+	}
+	return status;
+}
+
+/* str2mode --- convert a string mode to an integer mode */
+
+static int
+str2mode(mode)
+const char *mode;
+{
+	int ret;
+
+	switch(mode[0]) {
+	case 'r':
+		ret = O_RDONLY;
+		break;
+
+	case 'w':
+		ret = O_WRONLY|O_CREAT|O_TRUNC;
+		break;
+
+	case 'a':
+		ret = O_WRONLY|O_APPEND|O_CREAT;
+		break;
+
+	default:
+		ret = 0;		/* lint */
+		cant_happen();
+	}
+	return ret;
+}
+
+/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
+
+/*
+ * This separate version is still needed for output, since file and pipe
+ * output is done with stdio. iop_open() handles input with IOBUFs of
+ * more "special" files.  Those files are not handled here since it makes
+ * no sense to use them for output.
+ */
+
+int
+devopen(name, mode)
+const char *name, *mode;
+{
+	int openfd;
+	const char *cp;
+	char *ptr;
+	int flag = 0;
+	struct stat buf;
+	extern double strtod();
+
+	flag = str2mode(mode);
+
+	if (STREQ(name, "-"))
+		openfd = fileno(stdin);
+	else
+		openfd = INVALID_HANDLE;
+
+	if (do_traditional)
+		goto strictopen;
+
+	if ((openfd = os_devopen(name, flag)) >= 0)
+		return openfd;
+
+	if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
+		cp = name + 5;
+		
+		if (STREQ(cp, "stdin") && (flag & O_ACCMODE) == O_RDONLY)
+			openfd = fileno(stdin);
+		else if (STREQ(cp, "stdout") && (flag & O_ACCMODE) == O_WRONLY)
+			openfd = fileno(stdout);
+		else if (STREQ(cp, "stderr") && (flag & O_ACCMODE) == O_WRONLY)
+			openfd = fileno(stderr);
+		else if (STREQN(cp, "fd/", 3)) {
+			cp += 3;
+			openfd = (int) strtod(cp, &ptr);
+			if (openfd <= INVALID_HANDLE || ptr == cp)
+				openfd = INVALID_HANDLE;
+		}
+	}
+
+strictopen:
+	if (openfd == INVALID_HANDLE)
+		openfd = open(name, flag, 0666);
+	if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) 
+		if (S_ISDIR(buf.st_mode))
+			fatal("file `%s' is a directory", name);
+	return openfd;
+}
+
+
+/* spec_setup --- setup an IOBUF for a special internal file */
+
+static void
+spec_setup(iop, len, allocate)
+IOBUF *iop;
+int len;
+int allocate;
+{
+	char *cp;
+
+	if (allocate) {
+		emalloc(cp, char *, len+2, "spec_setup");
+		iop->buf = cp;
+	} else {
+		len = strlen(iop->buf);
+		iop->buf[len++] = '\n';	/* get_a_record clobbered it */
+		iop->buf[len] = '\0';	/* just in case */
+	}
+	iop->off = iop->buf;
+	iop->cnt = 0;
+	iop->secsiz = 0;
+	iop->size = len;
+	iop->end = iop->buf + len;
+	iop->fd = -1;
+	iop->flag = IOP_IS_INTERNAL;
+	iop->getrec = get_a_record;
+}
+
+/* specfdopen --- open an fd special file */
+
+static int
+specfdopen(iop, name, mode)
+IOBUF *iop;
+const char *name, *mode;
+{
+	int fd;
+	IOBUF *tp;
+
+	fd = devopen(name, mode);
+	if (fd == INVALID_HANDLE)
+		return INVALID_HANDLE;
+	tp = iop_alloc(fd, name, NULL);
+	if (tp == NULL) {
+		/* don't leak fd's */
+		close(fd);
+		return INVALID_HANDLE;
+	}
+	*iop = *tp;
+	iop->flag |= IOP_NO_FREE;
+	free(tp);
+	return 0;
+}
+
+#ifdef GETPGRP_VOID
+#define getpgrp_arg() /* nothing */
+#else
+#define getpgrp_arg() getpid()
+#endif
+
+/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */
+
+static int
+pidopen(iop, name, mode)
+IOBUF *iop;
+const char *name, *mode;
+{
+	char tbuf[BUFSIZ];
+	int i;
+
+	if (name[6] == 'g')
+		sprintf(tbuf, "%d\n", getpgrp(getpgrp_arg()));
+	else if (name[6] == 'i')
+		sprintf(tbuf, "%d\n", getpid());
+	else
+		sprintf(tbuf, "%d\n", getppid());
+	i = strlen(tbuf);
+	spec_setup(iop, i, TRUE);
+	strcpy(iop->buf, tbuf);
+	return 0;
+}
+
+/* useropen --- "open" /dev/user */
+
+/*
+ * /dev/user creates a record as follows:
+ *	$1 = getuid()
+ *	$2 = geteuid()
+ *	$3 = getgid()
+ *	$4 = getegid()
+ * If multiple groups are supported, then $5 through $NF are the
+ * supplementary group set.
+ */
+
+static int
+useropen(iop, name, mode)
+IOBUF *iop;
+const char *name, *mode;
+{
+	char tbuf[BUFSIZ], *cp;
+	int i;
+#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
+	GETGROUPS_T groupset[NGROUPS_MAX];
+	int ngroups;
+#endif
+
+	sprintf(tbuf, "%d %d %d %d", getuid(), geteuid(), getgid(), getegid());
+
+	cp = tbuf + strlen(tbuf);
+#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
+	ngroups = getgroups(NGROUPS_MAX, groupset);
+	if (ngroups == -1)
+		fatal("could not find groups: %s", strerror(errno));
+
+	for (i = 0; i < ngroups; i++) {
+		*cp++ = ' ';
+		sprintf(cp, "%d", (int) groupset[i]);
+		cp += strlen(cp);
+	}
+#endif
+	*cp++ = '\n';
+	*cp++ = '\0';
+
+	i = strlen(tbuf);
+	spec_setup(iop, i, TRUE);
+	strcpy(iop->buf, tbuf);
+	return 0;
+}
+
+/* iop_open --- handle special and regular files for input */
+
+static IOBUF *
+iop_open(name, mode, iop)
+const char *name, *mode;
+IOBUF *iop;
+{
+	int openfd = INVALID_HANDLE;
+	int flag = 0;
+	struct stat buf;
+	static struct internal {
+		const char *name;
+		int compare;
+		int (*fp) P((IOBUF *, const char *, const char *));
+		IOBUF iob;
+	} table[] = {
+		{ "/dev/fd/",		8,	specfdopen },
+		{ "/dev/stdin",		10,	specfdopen },
+		{ "/dev/stdout",	11,	specfdopen },
+		{ "/dev/stderr",	11,	specfdopen },
+		{ "/dev/pid",		8,	pidopen },
+		{ "/dev/ppid",		9,	pidopen },
+		{ "/dev/pgrpid",	11,	pidopen },
+		{ "/dev/user",		9,	useropen },
+	};
+	int devcount = sizeof(table) / sizeof(table[0]);
+
+	flag = str2mode(mode);
+
+	/*
+	 * FIXME: remove the stat call, and always process these files
+	 * internally.
+	 */
+	if (STREQ(name, "-"))
+		openfd = fileno(stdin);
+	else if (do_traditional)
+		goto strictopen;
+	else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
+		int i;
+
+		for (i = 0; i < devcount; i++) {
+			if (STREQN(name, table[i].name, table[i].compare)) {
+				iop = & table[i].iob;
+
+				if (iop->buf != NULL) {
+					spec_setup(iop, 0, FALSE);
+					return iop;
+				} else if ((*table[i].fp)(iop, name, mode) == 0)
+					return iop;
+				else {
+					warning("could not open %s, mode `%s'",
+						name, mode);
+					return NULL;
+				}
+			}
+		}
+	}
+
+strictopen:
+	if (openfd == INVALID_HANDLE)
+		openfd = open(name, flag, 0666);
+	if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) 
+		if ((buf.st_mode & S_IFMT) == S_IFDIR)
+			fatal("file `%s' is a directory", name);
+	return iop_alloc(openfd, name, iop);
+}
+
+#ifndef PIPES_SIMULATED		/* real pipes */
+
+/* wait_any --- wait for a child process, close associated pipe */
+
+static int
+wait_any(interesting)
+int interesting;	/* pid of interest, if any */
+{
+	RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)();
+	int pid;
+	int status = 0;
+	struct redirect *redp;
+	extern int errno;
+
+	hstat = signal(SIGHUP, SIG_IGN);
+	istat = signal(SIGINT, SIG_IGN);
+	qstat = signal(SIGQUIT, SIG_IGN);
+	for (;;) {
+#ifdef HAVE_SYS_WAIT_H	/* Posix compatible sys/wait.h */
+		pid = wait(&status);
+#else
+		pid = wait((union wait *)&status);
+#endif /* NeXT */
+		if (interesting && pid == interesting) {
+			break;
+		} else if (pid != -1) {
+			for (redp = red_head; redp != NULL; redp = redp->next)
+				if (pid == redp->pid) {
+					redp->pid = -1;
+					redp->status = status;
+					break;
+				}
+		}
+		if (pid == -1 && errno == ECHILD)
+			break;
+	}
+	signal(SIGHUP, hstat);
+	signal(SIGINT, istat);
+	signal(SIGQUIT, qstat);
+	return(status);
+}
+
+/* gawk_popen --- open an IOBUF on a child process */
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+	int p[2];
+	register int pid;
+
+	/*
+	 * used to wait for any children to synchronize input and output,
+	 * but this could cause gawk to hang when it is started in a pipeline
+	 * and thus has a child process feeding it input (shell dependant)
+	 */
+	/*(void) wait_any(0);*/	/* wait for outstanding processes */
+
+	if (pipe(p) < 0)
+		fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
+	if ((pid = fork()) == 0) {
+		if (close(1) == -1)
+			fatal("close of stdout in child failed (%s)",
+				strerror(errno));
+		if (dup(p[1]) != 1)
+			fatal("dup of pipe failed (%s)", strerror(errno));
+		if (close(p[0]) == -1 || close(p[1]) == -1)
+			fatal("close of pipe failed (%s)", strerror(errno));
+		execl("/bin/sh", "sh", "-c", cmd, NULL);
+		_exit(127);
+	}
+	if (pid == -1)
+		fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno));
+	rp->pid = pid;
+	if (close(p[1]) == -1)
+		fatal("close of pipe failed (%s)", strerror(errno));
+	rp->iop = iop_alloc(p[0], cmd, NULL);
+	if (rp->iop == NULL)
+		(void) close(p[0]);
+	return (rp->iop);
+}
+
+/* gawk_pclose --- close an open child pipe */
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+	(void) iop_close(rp->iop);
+	rp->iop = NULL;
+
+	/* process previously found, return stored status */
+	if (rp->pid == -1)
+		return (rp->status >> 8) & 0xFF;
+	rp->status = wait_any(rp->pid);
+	rp->pid = -1;
+	return (rp->status >> 8) & 0xFF;
+}
+
+#else	/* PIPES_SIMULATED */
+
+/*
+ * use temporary file rather than pipe
+ * except if popen() provides real pipes too
+ */
+
+#if defined(VMS) || defined(OS2) || defined (MSDOS)
+
+/* gawk_popen --- open an IOBUF on a child process */
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+	FILE *current;
+
+	if ((current = popen(cmd, "r")) == NULL)
+		return NULL;
+	rp->iop = iop_alloc(fileno(current), cmd, NULL);
+	if (rp->iop == NULL) {
+		(void) fclose(current);
+		current = NULL;
+	}
+	rp->ifp = current;
+	return (rp->iop);
+}
+
+/* gawk_pclose --- close an open child pipe */
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+	int rval, aval, fd = rp->iop->fd;
+
+	rp->iop->fd = dup(fd);	  /* kludge to allow close() + pclose() */
+	rval = iop_close(rp->iop);
+	rp->iop = NULL;
+	aval = pclose(rp->ifp);
+	rp->ifp = NULL;
+	return (rval < 0 ? rval : aval);
+}
+#else	/* not (VMS || OS2 || MSDOS) */
+
+static struct pipeinfo {
+	char *command;
+	char *name;
+} pipes[_NFILE];
+
+/* gawk_popen --- open an IOBUF on a child process */
+
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+	extern char *strdup P((const char *));
+	int current;
+	char *name;
+	static char cmdbuf[256];
+
+	/* get a name to use */
+	if ((name = tempnam(".", "pip")) == NULL)
+		return NULL;
+	sprintf(cmdbuf, "%s > %s", cmd, name);
+	system(cmdbuf);
+	if ((current = open(name, O_RDONLY)) == INVALID_HANDLE)
+		return NULL;
+	pipes[current].name = name;
+	pipes[current].command = strdup(cmd);
+	rp->iop = iop_alloc(current, name, NULL);
+	if (rp->iop == NULL)
+		(void) close(current);
+	return (rp->iop);
+}
+
+/* gawk_pclose --- close an open child pipe */
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+	int cur = rp->iop->fd;
+	int rval;
+
+	rval = iop_close(rp->iop);
+	rp->iop = NULL;
+
+	/* check for an open file  */
+	if (pipes[cur].name == NULL)
+		return -1;
+	unlink(pipes[cur].name);
+	free(pipes[cur].name);
+	pipes[cur].name = NULL;
+	free(pipes[cur].command);
+	return rval;
+}
+#endif	/* not (VMS || OS2 || MSDOS) */
+
+#endif	/* PIPES_SIMULATED */
+
+/* do_getline --- read in a line, into var and with redirection, as needed */
+
+NODE *
+do_getline(tree)
+NODE *tree;
+{
+	struct redirect *rp = NULL;
+	IOBUF *iop;
+	int cnt = EOF;
+	char *s = NULL;
+	int errcode;
+
+	while (cnt == EOF) {
+		if (tree->rnode == NULL) {	 /* no redirection */
+			iop = nextfile(FALSE);
+			if (iop == NULL)		/* end of input */
+				return tmp_number((AWKNUM) 0.0);
+		} else {
+			int redir_error = 0;
+
+			rp = redirect(tree->rnode, &redir_error);
+			if (rp == NULL && redir_error) { /* failed redirect */
+				if (! do_traditional) {
+					s = strerror(redir_error);
+
+					unref(ERRNO_node->var_value);
+					ERRNO_node->var_value =
+						make_string(s, strlen(s));
+				}
+				return tmp_number((AWKNUM) -1.0);
+			}
+			iop = rp->iop;
+			if (iop == NULL)		/* end of input */
+				return tmp_number((AWKNUM) 0.0);
+		}
+		errcode = 0;
+		cnt = (*(iop->getrec))(&s, iop, RS->stptr[0], RS_regexp, &errcode);
+		if (errcode != 0) {
+			if (! do_traditional) {
+				s = strerror(errcode);
+
+				unref(ERRNO_node->var_value);
+				ERRNO_node->var_value = make_string(s, strlen(s));
+			}
+			return tmp_number((AWKNUM) -1.0);
+		}
+		if (cnt == EOF) {
+			if (rp != NULL) {
+				/*
+				 * Don't do iop_close() here if we are
+				 * reading from a pipe; otherwise
+				 * gawk_pclose will not be called.
+				 */
+				if ((rp->flag & RED_PIPE) == 0) {
+					(void) iop_close(iop);
+					rp->iop = NULL;
+				}
+				rp->flag |= RED_EOF;	/* sticky EOF */
+				return tmp_number((AWKNUM) 0.0);
+			} else
+				continue;	/* try another file */
+		}
+		if (rp == NULL) {
+			NR++;
+			FNR++;
+		}
+		if (tree->lnode == NULL)	/* no optional var. */
+			set_record(s, cnt, TRUE);
+		else {			/* assignment to variable */
+			Func_ptr after_assign = NULL;
+			NODE **lhs;
+
+			lhs = get_lhs(tree->lnode, &after_assign);
+			unref(*lhs);
+			*lhs = make_string(s, cnt);
+			(*lhs)->flags |= MAYBE_NUM;
+			/* we may have to regenerate $0 here! */
+			if (after_assign != NULL)
+				(*after_assign)();
+		}
+	}
+	return tmp_number((AWKNUM) 1.0);
+}
+
+/* pathopen --- pathopen with default file extension handling */
+
+int
+pathopen(file)
+const char *file;
+{
+	int fd = do_pathopen(file);
+
+#ifdef DEFAULT_FILETYPE
+	if (! do_traditional && fd <= INVALID_HANDLE) {
+		char *file_awk;
+		int save = errno;
+#ifdef VMS
+		int vms_save = vaxc$errno;
+#endif
+
+		/* append ".awk" and try again */
+		emalloc(file_awk, char *, strlen(file) +
+			sizeof(DEFAULT_FILETYPE) + 1, "pathopen");
+		sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE);
+		fd = do_pathopen(file_awk);
+		free(file_awk);
+		if (fd <= INVALID_HANDLE) {
+			errno = save;
+#ifdef VMS
+			vaxc$errno = vms_save;
+#endif
+		}
+	}
+#endif	/*DEFAULT_FILETYPE*/
+
+	return fd;
+}
+
+/* do_pathopen --- search $AWKPATH for source file */
+
+static int
+do_pathopen(file)
+const char *file;
+{
+	static const char *savepath = NULL;
+	static int first = TRUE;
+	const char *awkpath;
+	char *cp, trypath[BUFSIZ];
+	int fd;
+
+	if (STREQ(file, "-"))
+		return (0);
+
+	if (do_traditional)
+		return (devopen(file, "r"));
+
+	if (first) {
+		first = FALSE;
+		if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath)
+			savepath = awkpath;	/* used for restarting */
+		else
+			savepath = defpath;
+	}
+	awkpath = savepath;
+
+	/* some kind of path name, no search */
+	if (ispath(file))
+		return (devopen(file, "r"));
+
+	do {
+		trypath[0] = '\0';
+		/* this should take into account limits on size of trypath */
+		for (cp = trypath; *awkpath && *awkpath != envsep; )
+			*cp++ = *awkpath++;
+
+		if (cp != trypath) {	/* nun-null element in path */
+			/* add directory punctuation only if needed */
+			if (! isdirpunct(*(cp-1)))
+				*cp++ = '/';
+			/* append filename */
+			strcpy(cp, file);
+		} else
+			strcpy(trypath, file);
+		if ((fd = devopen(trypath, "r")) > INVALID_HANDLE)
+			return (fd);
+
+		/* no luck, keep going */
+		if(*awkpath == envsep && awkpath[1] != '\0')
+			awkpath++;	/* skip colon */
+	} while (*awkpath != '\0');
+	/*
+	 * You might have one of the awk paths defined, WITHOUT the current
+	 * working directory in it. Therefore try to open the file in the
+	 * current directory.
+	 */
+	return (devopen(file, "r"));
+}
+
+#ifdef TEST
+int bufsize = 8192;
+
+void
+fatal(s)
+char *s;
+{
+	printf("%s\n", s);
+	exit(1);
+}
+#endif
+
+/* iop_alloc --- allocate an IOBUF structure for an open fd */
+
+static IOBUF *
+iop_alloc(fd, name, iop)
+int fd;
+const char *name;
+IOBUF *iop;
+{
+	struct stat sbuf;
+
+	if (fd == INVALID_HANDLE)
+		return NULL;
+	if (iop == NULL)
+		emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
+	iop->flag = 0;
+	if (isatty(fd))
+		iop->flag |= IOP_IS_TTY;
+	iop->size = optimal_bufsize(fd, & sbuf);
+	if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0)
+		warning("data file `%s' is empty", name);
+	iop->secsiz = -2;
+	errno = 0;
+	iop->fd = fd;
+	iop->off = iop->buf = NULL;
+	iop->cnt = 0;
+	iop->name = name;
+	iop->getrec = get_a_record;
+#ifdef HAVE_MMAP
+	if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0) {
+		register char *cp;
+
+		iop->buf = iop->off = mmap((caddr_t) 0, sbuf.st_size,
+					PROT_READ|PROT_WRITE, MAP_PRIVATE,
+					fd,  0L);
+		/* cast is for buggy compilers (e.g. DEC OSF/1) */
+		if (iop->buf == (caddr_t)MAP_FAILED) {
+			iop->buf = iop->off = NULL;
+			goto out;
+		}
+
+		iop->flag |= IOP_MMAPPED;
+		iop->size = sbuf.st_size;
+		iop->secsiz = 0;
+		iop->end = iop->buf + iop->size;
+		iop->cnt = sbuf.st_size;
+		iop->getrec = mmap_get_record;
+		(void) close(fd);
+		iop->fd = INVALID_HANDLE;
+
+#if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL)
+		madvise(iop->buf, iop->size, MADV_SEQUENTIAL);
+#endif
+		/*
+		 * The following is a really gross hack.
+		 * We want to ensure that we have a copy of the input
+		 * data that won't go away, on the off chance that someone
+		 * will truncate the data file we've just mmap'ed.
+		 * So, we go through and touch each page, forcing the
+		 * system to give us a private copy. A page size of 512
+		 * guarantees this will work, even on the least common
+		 * denominator system (like, oh say, a VAX).
+		 */
+		for (cp = iop->buf; cp < iop->end; cp += 512)
+			*cp = *cp;
+	}
+out:
+#endif /* HAVE_MMAP */
+	return iop;
+}
+
+/* These macros used by both record reading routines */
+#define set_RT_to_null() \
+	(void)(! do_traditional && (unref(RT_node->var_value), \
+			   RT_node->var_value = Nnull_string))
+
+#define set_RT(str, len) \
+	(void)(! do_traditional && (unref(RT_node->var_value), \
+			   RT_node->var_value = make_string(str, len)))
+
+/*
+ * get_a_record:
+ * Get the next record.  Uses a "split buffer" where the latter part is
+ * the normal read buffer and the head part is an "overflow" area that is used
+ * when a record spans the end of the normal buffer, in which case the first
+ * part of the record is copied into the overflow area just before the
+ * normal buffer.  Thus, the eventual full record can be returned as a
+ * contiguous area of memory with a minimum of copying.  The overflow area
+ * is expanded as needed, so that records are unlimited in length.
+ * We also mark both the end of the buffer and the end of the read() with
+ * a sentinel character (the current record separator) so that the inside
+ * loop can run as a single test.
+ *
+ * Note that since we know or can compute the end of the read and the end
+ * of the buffer, the sentinel character does not get in the way of regexp
+ * based searching, since we simply search up to that character, but not
+ * including it.
+ */
+
+static int
+get_a_record(out, iop, grRS, RSre, errcode)
+char **out;		/* pointer to pointer to data */
+IOBUF *iop;		/* input IOP */
+register int grRS;	/* first char in RS->stptr */
+Regexp *RSre;		/* regexp for RS */
+int *errcode;		/* pointer to error variable */
+{
+	register char *bp = iop->off;
+	char *bufend;
+	char *start = iop->off;			/* beginning of record */
+	int rs;
+	static Regexp *RS_null_re = NULL;
+	Regexp *rsre = NULL;
+	int continuing = FALSE, continued = FALSE;	/* used for re matching */
+	int onecase;
+
+	/* first time through */
+	if (RS_null_re == NULL) {
+		RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
+		if (RS_null_re == NULL)
+			fatal("internal error: file `%s', line %d\n",
+				__FILE__, __LINE__);
+	}
+
+	if (iop->cnt == EOF) {	/* previous read hit EOF */
+		*out = NULL;
+		set_RT_to_null();
+		return EOF;
+	}
+
+	if (grRS == FALSE)	/* special case:  RS == "" */
+		rs = '\n';
+	else
+		rs = (char) grRS;
+
+	onecase = (IGNORECASE && isalpha(rs));
+	if (onecase)
+		rs = casetable[rs];
+
+	/* set up sentinel */
+	if (iop->buf) {
+		bufend = iop->buf + iop->size + iop->secsiz;
+		*bufend = rs;		/* add sentinel to buffer */
+	} else
+		bufend = NULL;
+
+	for (;;) {	/* break on end of record, read error or EOF */
+/* buffer mgmt, chunk #1 */
+		/*
+		 * Following code is entered on the first call of this routine
+		 * for a new iop, or when we scan to the end of the buffer.
+		 * In the latter case, we copy the current partial record to
+		 * the space preceding the normal read buffer.  If necessary,
+		 * we expand this space.  This is done so that we can return
+		 * the record as a contiguous area of memory.
+		 */
+		if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
+			char *oldbuf = NULL;
+			char *oldsplit = iop->buf + iop->secsiz;
+			long len;	/* record length so far */
+
+			len = bp - start;
+			if (len > iop->secsiz) {
+				/* expand secondary buffer */
+				if (iop->secsiz == -2)
+					iop->secsiz = 256;
+				while (len > iop->secsiz)
+					iop->secsiz *= 2;
+				oldbuf = iop->buf;
+				emalloc(iop->buf, char *,
+				    iop->size+iop->secsiz+2, "get_a_record");
+				bufend = iop->buf + iop->size + iop->secsiz;
+				*bufend = rs;
+			}
+			if (len > 0) {
+				char *newsplit = iop->buf + iop->secsiz;
+
+				if (start < oldsplit) {
+					memcpy(newsplit - len, start,
+							oldsplit - start);
+					memcpy(newsplit - (bp - oldsplit),
+							oldsplit, bp - oldsplit);
+				} else
+					memcpy(newsplit - len, start, len);
+			}
+			bp = iop->end = iop->off = iop->buf + iop->secsiz;
+			start = bp - len;
+			if (oldbuf != NULL) {
+				free(oldbuf);
+				oldbuf = NULL;
+			}
+		}
+/* buffer mgmt, chunk #2 */
+		/*
+		 * Following code is entered whenever we have no more data to
+		 * scan.  In most cases this will read into the beginning of
+		 * the main buffer, but in some cases (terminal, pipe etc.)
+		 * we may be doing smallish reads into more advanced positions.
+		 */
+		if (bp >= iop->end) {
+			if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+				iop->cnt = EOF;
+				break;
+			}
+			iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
+			if (iop->cnt == -1) {
+				if (! do_traditional && errcode != NULL) {
+					*errcode = errno;
+					iop->cnt = EOF;
+					break;
+				} else
+					fatal("error reading input file `%s': %s",
+						iop->name, strerror(errno));
+			} else if (iop->cnt == 0) {
+				/*
+				 * hit EOF before matching RS, so end
+				 * the record and set RT to ""
+				 */
+				iop->cnt = EOF;
+				/* see comments below about this test */
+				if (! continuing) {
+					set_RT_to_null();
+					break;
+				}
+			}
+			if (iop->cnt != EOF) {
+				iop->end += iop->cnt;
+				*iop->end = rs;		/* reset the sentinel */
+			}
+		}
+/* buffers are now setup and filled with data */
+/* search for RS, #1, regexp based, or RS = "" */
+		/*
+		 * Attempt to simplify the code a bit. The case where
+		 * RS = "" can also be described by a regexp, RS = "\n\n+".
+		 * The buffer managment and searching code can thus now
+		 * use a common case (the one for regexps) both when RS is
+		 * a regexp, and when RS = "". This particularly benefits
+		 * us for keeping track of how many newlines were matched
+		 * in order to set RT.
+		 */
+		if (! do_traditional && RSre != NULL)	/* regexp */
+			rsre = RSre;
+		else if (grRS == FALSE)		/* RS = "" */
+			rsre = RS_null_re;
+		else
+			rsre = NULL;
+
+		/*
+		 * Look for regexp match of RS.  Non-match conditions are:
+		 *	1. No match at all
+		 *	2. Match of a null string
+		 *	3. Match ends at exact end of buffer
+		 * Number 3 is subtle; we have to add more to the buffer
+		 * in case the match would have extended further into the
+		 * file, since regexp match by definition always matches the
+		 * longest possible match.
+		 *
+		 * It is even more subtle than you might think. Suppose
+		 * the re matches at exactly the end of file. We don't know
+		 * that until we try to add more to the buffer. Thus, we
+		 * set a flag to indicate, that if eof really does happen,
+		 * don't break early.
+		 */
+		continuing = FALSE;
+		if (rsre != NULL) {
+		again:
+			/* cases 1 and 2 are simple, just keep going */
+			if (research(rsre, start, 0, iop->end - start, TRUE) == -1
+			    || RESTART(rsre, start) == REEND(rsre, start)) {
+				bp = iop->end;
+				continue;
+			}
+			/* case 3, regex match at exact end */
+			if (start + REEND(rsre, start) >= iop->end) {
+				if (iop->cnt != EOF) {
+					bp = iop->end;
+					continuing = continued = TRUE;
+					continue;
+				}
+			}
+			/* got a match! */
+			/*
+			 * Leading newlines at the beginning of the file
+			 * should be ignored. Whew!
+			 */
+			if (grRS == FALSE && RESTART(rsre, start) == 0) {
+				start += REEND(rsre, start);
+				goto again;
+			}
+			bp = start + RESTART(rsre, start);
+			set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
+			*bp = '\0';
+			iop->off = start + REEND(rsre, start);
+			break;
+		}
+/* search for RS, #2, RS = <single char> */
+		if (onecase) {
+			while (casetable[(int) *bp++] != rs)
+				continue;
+		} else {
+			while (*bp++ != rs)
+				continue;
+		}
+		set_RT(bp - 1, 1);
+
+		if (bp <= iop->end)
+			break;
+		else
+			bp--;
+
+		if ((iop->flag & IOP_IS_INTERNAL) != 0)
+			iop->cnt = bp - start;
+	}
+	if (iop->cnt == EOF
+	    && (((iop->flag & IOP_IS_INTERNAL) != 0)
+	          || (start == bp && ! continued))) {
+		*out = NULL;
+		set_RT_to_null();
+		return EOF;
+	}
+
+	if (do_traditional || rsre == NULL) {
+		char *bstart;
+
+		bstart = iop->off = bp;
+		bp--;
+		if (onecase ? casetable[(int) *bp] != rs : *bp != rs) {
+			bp++;
+			bstart = bp;
+		}
+		*bp = '\0';
+	} else if (grRS == FALSE && iop->cnt == EOF) {
+		/*
+		 * special case, delete trailing newlines,
+		 * should never be more than one.
+		 */
+		while (bp[-1] == '\n')
+			bp--;
+		*bp = '\0';
+	}
+
+	*out = start;
+	return bp - start;
+}
+
+#ifdef TEST
+int
+main(argc, argv)
+int argc;
+char *argv[];
+{
+	IOBUF *iop;
+	char *out;
+	int cnt;
+	char rs[2];
+
+	rs[0] = '\0';
+	if (argc > 1)
+		bufsize = atoi(argv[1]);
+	if (argc > 2)
+		rs[0] = *argv[2];
+	iop = iop_alloc(0, "stdin", NULL);
+	while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) {
+		fwrite(out, 1, cnt, stdout);
+		fwrite(rs, 1, 1, stdout);
+	}
+	return 0;
+}
+#endif
+
+#ifdef HAVE_MMAP
+/* mmap_get_record --- pull a record out of a memory-mapped file */
+
+static int
+mmap_get_record(out, iop, grRS, RSre, errcode)
+char **out;		/* pointer to pointer to data */
+IOBUF *iop;		/* input IOP */
+register int grRS;	/* first char in RS->stptr */
+Regexp *RSre;		/* regexp for RS */
+int *errcode;		/* pointer to error variable */
+{
+	register char *bp = iop->off;
+	char *start = iop->off;			/* beginning of record */
+	int rs;
+	static Regexp *RS_null_re = NULL;
+	Regexp *rsre = NULL;
+	int onecase;
+	register char *end = iop->end;
+	int cnt;
+
+	/* first time through */
+	if (RS_null_re == NULL) {
+		RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
+		if (RS_null_re == NULL)
+			fatal("internal error: file `%s', line %d\n",
+				__FILE__, __LINE__);
+	}
+
+	if (iop->off >= iop->end) {	/* previous record was last */
+		*out = NULL;
+		set_RT_to_null();
+		iop->cnt = EOF;		/* tested by higher level code */
+		return EOF;
+	}
+
+	if (grRS == FALSE)	/* special case:  RS == "" */
+		rs = '\n';
+	else
+		rs = (char) grRS;
+
+	onecase = (IGNORECASE && isalpha(rs));
+	if (onecase)
+		rs = casetable[rs];
+
+	/* if RS = "", skip leading newlines at the front of the file */
+	if (grRS == FALSE && iop->off == iop->buf) {
+		for (bp = iop->off; *bp == '\n'; bp++)
+			continue;
+
+		if (bp != iop->off)
+			iop->off = start = bp;
+	}
+
+	/*
+	 * Regexp based searching. Either RS = "" or RS = <regex>
+	 * See comments in get_a_record.
+	 */
+	if (! do_traditional && RSre != NULL)	/* regexp */
+		rsre = RSre;
+	else if (grRS == FALSE)		/* RS = "" */
+		rsre = RS_null_re;
+	else
+		rsre = NULL;
+
+	/*
+	 * Look for regexp match of RS.  Non-match conditions are:
+	 *	1. No match at all
+	 *	2. Match of a null string
+	 *	3. Match ends at exact end of buffer
+	 *
+	 * #1 means that the record ends the file
+	 * and there is no text that actually matched RS.
+	 *
+	 * #2: is probably like #1.
+	 *
+	 * #3 is simple; since we have the whole file mapped, it's
+	 * the last record in the file.
+	 */
+	if (rsre != NULL) {
+		if (research(rsre, start, 0, iop->end - start, TRUE) == -1
+		    || RESTART(rsre, start) == REEND(rsre, start)) {
+			/* no matching text, we have the record */
+			*out = start;
+			iop->off = iop->end;	/* all done with the record */
+			set_RT_to_null();
+			/* special case, don't allow trailing newlines */
+			if (grRS == FALSE && *(iop->end - 1) == '\n')
+				return iop->end - start - 1;
+			else
+				return iop->end - start;
+
+		}
+		/* have a match */
+		*out = start;
+		bp = start + RESTART(rsre, start);
+		set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
+		*bp = '\0';
+		iop->off = start + REEND(rsre, start);
+		return bp - start;
+	}
+
+	/*
+	 * RS = "?", i.e., one character based searching.
+	 *
+	 * Alas, we can't just plug the sentinel character in at
+	 * the end of the mmapp'ed file ( *(iop->end) = rs; ). This
+	 * works if we're lucky enough to have a file that does not
+	 * take up all of its last disk block. But if we end up with
+	 * file whose size is an even multiple of the disk block size,
+	 * assigning past the end of it delivers a SIGBUS. So, we have to
+	 * add the extra test in the while loop at the front that looks
+	 * for going past the end of the mapped object. Sigh.
+	 */
+	/* search for RS, #2, RS = <single char> */
+	if (onecase) {
+		while (bp < end && casetable[*bp++] != rs)
+			continue;
+	} else {
+		while (bp < end && *bp++ != rs)
+			continue;
+	}
+	cnt = (bp - start) - 1;
+	if (bp >= iop->end) {
+		/* at end, may have actually seen rs, or may not */
+		if (*(bp-1) == rs)
+			set_RT(bp - 1, 1);	/* real RS seen */
+		else {
+			cnt++;
+			set_RT_to_null();
+		}
+	} else
+		set_RT(bp - 1, 1);
+
+	iop->off = bp;
+	*out = start;
+	return cnt;
+}
+#endif /* HAVE_MMAP */
+
+/* set_RS --- update things as appropriate when RS is set */
+
+void
+set_RS()
+{
+	static NODE *save_rs = NULL;
+
+	if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
+		return;
+	unref(save_rs);
+	save_rs = dupnode(RS_node->var_value);
+	RS_is_null = FALSE;
+	RS = force_string(RS_node->var_value);
+	if (RS_regexp != NULL) {
+		refree(RS_regexp);
+		RS_regexp = NULL;
+	}
+	if (RS->stlen == 0)
+		RS_is_null = TRUE;
+	else if (RS->stlen > 1)
+		RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE);
+
+	set_FS_if_not_FIELDWIDTHS();
+}
diff --git a/contrib/awk/main.c b/contrib/awk/main.c
new file mode 100644
index 0000000..92445de
--- /dev/null
+++ b/contrib/awk/main.c
@@ -0,0 +1,735 @@
+/*
+ * main.c -- Expression tree constructors and main program for gawk. 
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+#include "getopt.h"
+#include "patchlevel.h"
+
+static void usage P((int exitval, FILE *fp));
+static void copyleft P((void));
+static void cmdline_fs P((char *str));
+static void init_args P((int argc0, int argc, char *argv0, char **argv));
+static void init_vars P((void));
+static void pre_assign P((char *v));
+RETSIGTYPE catchsig P((int sig, int code));
+static void nostalgia P((void));
+static void version P((void));
+
+/* These nodes store all the special variables AWK uses */
+NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node;
+NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node;
+NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
+NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
+ 
+long NF;
+long NR;
+long FNR;
+int IGNORECASE;
+char *OFS;
+char *ORS;
+char *OFMT;
+
+/*
+ * CONVFMT is a convenience pointer for the current number to string format.
+ * We must supply an initial value to avoid recursion problems of
+ *	set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
+ * Fun, fun, fun, fun.
+ */
+char *CONVFMT = "%.6g";
+
+int errcount = 0;		/* error counter, used by yyerror() */
+
+NODE *Nnull_string;		/* The global null string */
+
+/* The name the program was invoked under, for error messages */
+const char *myname;
+
+/* A block of AWK code to be run before running the program */
+NODE *begin_block = NULL;
+
+/* A block of AWK code to be run after the last input file */
+NODE *end_block = NULL;
+
+int exiting = FALSE;		/* Was an "exit" statement executed? */
+int exit_val = 0;		/* optional exit value */
+
+#if defined(YYDEBUG) || defined(DEBUG)
+extern int yydebug;
+#endif
+
+struct src *srcfiles = NULL;	/* source file name(s) */
+long numfiles = -1;		/* how many source files */
+
+int do_traditional = FALSE;	/* no gnu extensions, add traditional weirdnesses */
+int do_posix = FALSE;		/* turn off gnu and unix extensions */
+int do_lint = FALSE;		/* provide warnings about questionable stuff */
+int do_lint_old = FALSE;	/* warn about stuff not in V7 awk */
+int do_nostalgia = FALSE;	/* provide a blast from the past */
+int do_intervals = FALSE;	/* allow {...,...} in regexps */
+
+int in_begin_rule = FALSE;	/* we're in a BEGIN rule */
+int in_end_rule = FALSE;	/* we're in a END rule */
+
+int output_is_tty = FALSE;	/* control flushing of output */
+
+extern char *version_string;	/* current version, for printing */
+
+/* The parse tree is stored here.  */
+NODE *expression_value;
+
+static struct option optab[] = {
+	{ "compat",		no_argument,		& do_traditional,	1 },
+	{ "traditional",	no_argument,		& do_traditional,	1 },
+	{ "lint",		no_argument,		& do_lint,	1 },
+	{ "lint-old",		no_argument,		& do_lint_old,	1 },
+	{ "posix",		no_argument,		& do_posix,	1 },
+	{ "nostalgia",		no_argument,		& do_nostalgia,	1 },
+	{ "copyleft",		no_argument,		NULL,		'C' },
+	{ "copyright",		no_argument,		NULL,		'C' },
+	{ "field-separator",	required_argument,	NULL,		'F' },
+	{ "file",		required_argument,	NULL,		'f' },
+	{ "re-interval",		no_argument,	& do_intervals,		1 },
+	{ "source",		required_argument,	NULL,		's' },
+	{ "assign",		required_argument,	NULL,		'v' },
+	{ "version",		no_argument,		NULL,		'V' },
+	{ "usage",		no_argument,		NULL,		'u' },
+	{ "help",		no_argument,		NULL,		'u' },
+#ifdef DEBUG
+	{ "parsedebug",		no_argument,		NULL,		'D' },
+#endif
+	{ NULL, 0, NULL, '\0' }
+};
+
+/* main --- process args, parse program, run it, clean up */
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+{
+	int c;
+	char *scan;
+	/* the + on the front tells GNU getopt not to rearrange argv */
+	const char *optlist = "+F:f:v:W;m:";
+	int stopped_early = FALSE;
+	int old_optind;
+	extern int optind;
+	extern int opterr;
+	extern char *optarg;
+
+	setlocale(LC_CTYPE, "");
+	setlocale(LC_COLLATE, "");
+
+	(void) signal(SIGFPE,  (RETSIGTYPE (*) P((int))) catchsig);
+	(void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
+#ifdef SIGBUS
+	(void) signal(SIGBUS,  (RETSIGTYPE (*) P((int))) catchsig);
+#endif
+
+	myname = gawk_name(argv[0]);
+        argv[0] = (char *) myname;
+	os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
+
+	/* remove sccs gunk */
+	if (strncmp(version_string, "@(#)", 4) == 0)
+		version_string += 4;
+
+	if (argc < 2)
+		usage(1, stderr);
+
+	/* initialize the null string */
+	Nnull_string = make_string("", 0);
+	Nnull_string->numbr = 0.0;
+	Nnull_string->type = Node_val;
+	Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
+
+	/*
+	 * Tell the regex routines how they should work.
+	 * Do this before initializing variables, since
+	 * they could want to do a regexp compile.
+	 */
+	resetup();
+
+	/* Set up the special variables */
+	/*
+	 * Note that this must be done BEFORE arg parsing else -F
+	 * breaks horribly 
+	 */
+	init_vars();
+
+	/* worst case */
+	emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
+	memset(srcfiles, '\0', argc * sizeof(struct src));
+
+	/* we do error messages ourselves on invalid options */
+	opterr = FALSE;
+
+	/* option processing. ready, set, go! */
+	for (optopt = 0, old_optind = 1;
+	     (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
+	     optopt = 0, old_optind = optind) {
+		if (do_posix)
+			opterr = TRUE;
+
+		switch (c) {
+		case 'F':
+			cmdline_fs(optarg);
+			break;
+
+		case 'f':
+			/*
+			 * a la MKS awk, allow multiple -f options.
+			 * this makes function libraries real easy.
+			 * most of the magic is in the scanner.
+			 *
+			 * The following is to allow for whitespace at the end
+			 * of a #! /bin/gawk line in an executable file
+			 */
+			scan = optarg;
+			while (ISSPACE(*scan))
+				scan++;
+
+			++numfiles;
+			srcfiles[numfiles].stype = SOURCEFILE;
+			if (*scan == '\0')
+				srcfiles[numfiles].val = argv[optind++];
+			else
+				srcfiles[numfiles].val = optarg;
+			break;
+
+		case 'v':
+			pre_assign(optarg);
+			break;
+
+		case 'm':
+			/*
+			 * Research awk extension.
+			 *	-mf nnn		set # fields, gawk ignores
+			 *	-mr nnn		set record length, ditto
+			 */
+			if (do_lint)
+				warning("-m[fr] option irrelevant in gawk");
+			if (optarg[0] != 'r' && optarg[0] != 'f')
+				warning("-m option usage: `-m[fr] nnn'");
+			if (optarg[1] == '\0')
+				optind++;
+			break;
+
+		case 'W':       /* gawk specific options - now in getopt_long */
+			fprintf(stderr, "%s: option `-W %s' unrecognized, ignored\n",
+				argv[0], optarg);
+			break;
+
+		/* These can only come from long form options */
+		case 'C':
+			copyleft();
+			break;
+
+		case 's':
+			if (optarg[0] == '\0')
+				warning("empty argument to --source ignored");
+			else {
+				srcfiles[++numfiles].stype = CMDLINE;
+				srcfiles[numfiles].val = optarg;
+			}
+			break;
+
+		case 'u':
+			usage(0, stdout);	/* per coding stds */
+			break;
+
+		case 'V':
+			version();
+			break;
+
+#ifdef DEBUG
+		case 'D':
+			yydebug = 2;
+			break;
+#endif
+
+		case 0:
+			/*
+			 * getopt_long found an option that sets a variable
+			 * instead of returning a letter. Do nothing, just
+			 * cycle around for the next one.
+			 */
+			break;
+
+		case '?':
+		default:
+			/*
+			 * New behavior.  If not posix, an unrecognized
+			 * option stops argument processing so that it can
+			 * go into ARGV for the awk program to see. This
+			 * makes use of ``#! /bin/gawk -f'' easier.
+			 *
+			 * However, it's never simple. If optopt is set,
+			 * an option that requires an argument didn't get the
+			 * argument. We care because if opterr is 0, then
+			 * getopt_long won't print the error message for us.
+			 */
+			if (! do_posix
+			    && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
+				/*
+				 * can't just do optind--. In case of an
+				 * option with >= 2 letters, getopt_long
+				 * won't have incremented optind.
+				 */
+				optind = old_optind;
+				stopped_early = TRUE;
+				goto out;
+			} else if (optopt != '\0')
+				/* Use 1003.2 required message format */
+				fprintf(stderr,
+				"%s: option requires an argument -- %c\n",
+					myname, optopt);
+			/* else
+				let getopt print error message for us */
+			break;
+		}
+	}
+out:
+
+	if (do_nostalgia)
+		nostalgia();
+
+	/* check for POSIXLY_CORRECT environment variable */
+	if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
+		do_posix = TRUE;
+		if (do_lint)
+			warning(
+	"environment variable `POSIXLY_CORRECT' set: turning on --posix");
+	}
+
+	if (do_posix) {
+		if (do_traditional)	/* both on command line */
+			warning("--posix overrides --traditional");
+		else
+			do_traditional = TRUE;
+			/*
+			 * POSIX compliance also implies
+			 * no GNU extensions either.
+			 */
+	}
+
+	/*
+	 * Tell the regex routines how they should work.
+	 * Do this again, after argument processing, since do_posix
+	 * and do_traditional are now paid attention to by resetup().
+	 */
+	if (do_traditional || do_posix || do_intervals) {
+		resetup();
+
+		/* now handle RS and FS. have to be careful with FS */
+		set_RS();
+		if (using_fieldwidths()) {
+			set_FS();
+			set_FIELDWIDTHS();
+		} else
+			set_FS();
+	}
+
+#ifdef DEBUG
+	setbuf(stdout, (char *) NULL);	/* make debugging easier */
+#endif
+	if (isatty(fileno(stdout)))
+		output_is_tty = TRUE;
+	/* No -f or --source options, use next arg */
+	if (numfiles == -1) {
+		if (optind > argc - 1 || stopped_early) /* no args left or no program */
+			usage(1, stderr);
+		srcfiles[++numfiles].stype = CMDLINE;
+		srcfiles[numfiles].val = argv[optind];
+		optind++;
+	}
+
+	init_args(optind, argc, (char *) myname, argv);
+	(void) tokexpand();
+
+	/* Read in the program */
+	if (yyparse() != 0 || errcount != 0)
+		exit(1);
+	/* recover any space from C based alloca */
+#ifdef C_ALLOCA
+	(void) alloca(0);
+#endif
+
+	/* Set up the field variables */
+	init_fields();
+
+	if (do_lint && begin_block == NULL && expression_value == NULL
+	     && end_block == NULL)
+		warning("no program");
+
+	if (begin_block != NULL) {
+		in_begin_rule = TRUE;
+		(void) interpret(begin_block);
+	}
+	in_begin_rule = FALSE;
+	if (! exiting && (expression_value != NULL || end_block != NULL))
+		do_input();
+	if (end_block != NULL) {
+		in_end_rule = TRUE;
+		(void) interpret(end_block);
+	}
+	in_end_rule = FALSE;
+	if (close_io() != 0 && exit_val == 0)
+		exit_val = 1;
+	exit(exit_val);		/* more portable */
+	return exit_val;	/* to suppress warnings */
+}
+
+/* usage --- print usage information and exit */
+
+static void
+usage(exitval, fp)
+int exitval;
+FILE *fp;
+{
+	char *opt1 = " -f progfile [--]";
+	char *regops = " [POSIX or GNU style options]";
+
+	fprintf(fp, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n",
+		myname, regops, opt1, myname, regops, quote, quote);
+
+	/* GNU long options info. Gack. */
+	fputs("POSIX options:\t\tGNU long options:\n", fp);
+	fputs("\t-f progfile\t\t--file=progfile\n", fp);
+	fputs("\t-F fs\t\t\t--field-separator=fs\n", fp);
+	fputs("\t-v var=val\t\t--assign=var=val\n", fp);
+	fputs("\t-m[fr] val\n", fp);
+	fputs("\t-W compat\t\t--compat\n", fp);
+	fputs("\t-W copyleft\t\t--copyleft\n", fp);
+	fputs("\t-W copyright\t\t--copyright\n", fp);
+	fputs("\t-W help\t\t\t--help\n", fp);
+	fputs("\t-W lint\t\t\t--lint\n", fp);
+	fputs("\t-W lint-old\t\t--lint-old\n", fp);
+#ifdef NOSTALGIA
+	fputs("\t-W nostalgia\t\t--nostalgia\n", fp);
+#endif
+#ifdef DEBUG
+	fputs("\t-W parsedebug\t\t--parsedebug\n", fp);
+#endif
+	fputs("\t-W posix\t\t--posix\n", fp);
+	fputs("\t-W re-interval\t\t--re-interval\n", fp);
+	fputs("\t-W source=program-text\t--source=program-text\n", fp);
+	fputs("\t-W traditional\t\t--traditional\n", fp);
+	fputs("\t-W usage\t\t--usage\n", fp);
+	fputs("\t-W version\t\t--version\n", fp);
+	fputs("\nReport bugs to bug-gnu-utils@prep.ai.mit.edu,\n", fp);
+	fputs("with a Cc: to arnold@gnu.ai.mit.edu\n", fp);
+	exit(exitval);
+}
+
+/* copyleft --- print out the short GNU copyright information */
+
+static void
+copyleft()
+{
+	static char blurb_part1[] =
+"Copyright (C) 1989, 1991-1997 Free Software Foundation.\n\
+\n\
+This program is free software; you can redistribute it and/or modify\n\
+it under the terms of the GNU General Public License as published by\n\
+the Free Software Foundation; either version 2 of the License, or\n\
+(at your option) any later version.\n\
+\n";
+	static char blurb_part2[] =
+"This program is distributed in the hope that it will be useful,\n\
+but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
+GNU General Public License for more details.\n\
+\n";
+	static char blurb_part3[] =
+"You should have received a copy of the GNU General Public License\n\
+along with this program; if not, write to the Free Software\n\
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.\n";
+
+	/* multiple blurbs are needed for some brain dead compilers. */
+	fputs(blurb_part1, stdout);
+	fputs(blurb_part2, stdout);
+	fputs(blurb_part3, stdout);
+	fflush(stdout);
+	exit(0);
+}
+
+/* cmdline_fs --- set FS from the command line */
+
+static void
+cmdline_fs(str)
+char *str;
+{
+	register NODE **tmp;
+
+	tmp = get_lhs(FS_node, (Func_ptr *) 0);
+	unref(*tmp);
+	/*
+	 * Only if in full compatibility mode check for the stupid special
+	 * case so -F\t works as documented in awk book even though the shell
+	 * hands us -Ft.  Bleah!
+	 *
+	 * Thankfully, Posix didn't propogate this "feature".
+	 */
+	if (str[0] == 't' && str[1] == '\0') {
+		if (do_lint)
+			warning("-Ft does not set FS to tab in POSIX awk");
+		if (do_traditional && ! do_posix)
+			str[0] = '\t';
+	}
+	*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
+	set_FS();
+}
+
+/* init_args --- set up ARGV from stuff on the command line */
+
+static void
+init_args(argc0, argc, argv0, argv)
+int argc0, argc;
+char *argv0;
+char **argv;
+{
+	int i, j;
+	NODE **aptr;
+
+	ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL));
+	aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
+	*aptr = make_string(argv0, strlen(argv0));
+	(*aptr)->flags |= MAYBE_NUM;
+	for (i = argc0, j = 1; i < argc; i++) {
+		aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
+		*aptr = make_string(argv[i], strlen(argv[i]));
+		(*aptr)->flags |= MAYBE_NUM;
+		j++;
+	}
+	ARGC_node = install("ARGC",
+			node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
+}
+
+/*
+ * Set all the special variables to their initial values.
+ * Note that some of the variables that have set_FOO routines should
+ * *N*O*T* have those routines called upon initialization, and thus
+ * they have NULL entries in that field. This is notably true of FS
+ * and IGNORECASE.
+ */
+struct varinit {
+	NODE **spec;
+	const char *name;
+	NODETYPE type;
+	const char *strval;
+	AWKNUM numval;
+	Func_ptr assign;
+};
+static struct varinit varinit[] = {
+{&CONVFMT_node,	"CONVFMT",	Node_CONVFMT,		"%.6g",	0,  set_CONVFMT },
+{&NF_node,	"NF",		Node_NF,		NULL,	-1, set_NF },
+{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS,	"",	0,  NULL },
+{&NR_node,	"NR",		Node_NR,		NULL,	0,  set_NR },
+{&FNR_node,	"FNR",		Node_FNR,		NULL,	0,  set_FNR },
+{&FS_node,	"FS",		Node_FS,		" ",	0,  NULL },
+{&RS_node,	"RS",		Node_RS,		"\n",	0,  set_RS },
+{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE,	NULL,	0,  NULL },
+{&FILENAME_node, "FILENAME",	Node_var,		"",	0,  NULL },
+{&OFS_node,	"OFS",		Node_OFS,		" ",	0,  set_OFS },
+{&ORS_node,	"ORS",		Node_ORS,		"\n",	0,  set_ORS },
+{&OFMT_node,	"OFMT",		Node_OFMT,		"%.6g",	0,  set_OFMT },
+{&RLENGTH_node, "RLENGTH",	Node_var,		NULL,	0,  NULL },
+{&RSTART_node,	"RSTART",	Node_var,		NULL,	0,  NULL },
+{&SUBSEP_node,	"SUBSEP",	Node_var,		"\034",	0,  NULL },
+{&ARGIND_node,	"ARGIND",	Node_var,		NULL,	0,  NULL },
+{&ERRNO_node,	"ERRNO",	Node_var,		NULL,	0,  NULL },
+{&RT_node,	"RT",		Node_var,		"",	0,  NULL },
+{0,		NULL,		Node_illegal,		NULL,	0,  NULL },
+};
+
+/* init_vars --- actually initialize everything in the symbol table */
+
+static void
+init_vars()
+{
+	register struct varinit *vp;
+
+	for (vp = varinit; vp->name; vp++) {
+		*(vp->spec) = install((char *) vp->name,
+		  node(vp->strval == NULL ? make_number(vp->numval)
+				: make_string((char *) vp->strval,
+					strlen(vp->strval)),
+		       vp->type, (NODE *) NULL));
+		(*(vp->spec))->flags |= SCALAR;
+		if (vp->assign)
+			(*(vp->assign))();
+	}
+}
+
+/* load_environ --- populate the ENVIRON array */
+
+void
+load_environ()
+{
+#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
+	extern char **environ;
+#endif
+	register char *var, *val, *cp;
+	NODE **aptr;
+	register int i;
+
+	ENVIRON_node = install("ENVIRON", 
+			node(Nnull_string, Node_var, (NODE *) NULL));
+	for (i = 0; environ[i] != NULL; i++) {
+		static char nullstr[] = "";
+
+		var = environ[i];
+		val = strchr(var, '=');
+		if (val != NULL)
+			*val++ = '\0';
+		else
+			val = nullstr;
+		aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)));
+		*aptr = make_string(val, strlen(val));
+		(*aptr)->flags |= (MAYBE_NUM|SCALAR);
+
+		/* restore '=' so that system() gets a valid environment */
+		if (val != nullstr)
+			*--val = '=';
+	}
+	/*
+	 * Put AWKPATH into ENVIRON if it's not there.
+	 * This allows querying it from outside gawk.
+	 */
+	if ((cp = getenv("AWKPATH")) == NULL) {
+		aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7));
+		*aptr = make_string(defpath, strlen(defpath));
+		(*aptr)->flags |= SCALAR;
+	}
+}
+
+/* arg_assign --- process a command-line assignment */
+
+char *
+arg_assign(arg)
+char *arg;
+{
+	char *cp, *cp2;
+	int badvar;
+	Func_ptr after_assign = NULL;
+	NODE *var;
+	NODE *it;
+	NODE **lhs;
+
+	cp = strchr(arg, '=');
+	if (cp != NULL) {
+		*cp++ = '\0';
+		/* first check that the variable name has valid syntax */
+		badvar = FALSE;
+		if (! isalpha(arg[0]) && arg[0] != '_')
+			badvar = TRUE;
+		else
+			for (cp2 = arg+1; *cp2; cp2++)
+				if (! isalnum(*cp2) && *cp2 != '_') {
+					badvar = TRUE;
+					break;
+				}
+		if (badvar)
+			fatal("illegal name `%s' in variable assignment", arg);
+
+		/*
+		 * Recent versions of nawk expand escapes inside assignments.
+		 * This makes sense, so we do it too.
+		 */
+		it = make_str_node(cp, strlen(cp), SCAN);
+		it->flags |= (MAYBE_NUM|SCALAR);
+		var = variable(arg, FALSE, Node_var);
+		lhs = get_lhs(var, &after_assign);
+		unref(*lhs);
+		*lhs = it;
+		if (after_assign != NULL)
+			(*after_assign)();
+		*--cp = '=';	/* restore original text of ARGV */
+	}
+	return cp;
+}
+
+/* pre_assign --- handle -v, print a message and die if a problem */
+
+static void
+pre_assign(v)
+char *v;
+{
+	if (arg_assign(v) == NULL) {
+		fprintf(stderr,
+			"%s: `%s' argument to `-v' not in `var=value' form\n",
+				myname, v);
+		usage(1, stderr);
+	}
+}
+
+/* catchsig --- catch signals */
+
+RETSIGTYPE
+catchsig(sig, code)
+int sig, code;
+{
+#ifdef lint
+	code = 0; sig = code; code = sig;
+#endif
+	if (sig == SIGFPE) {
+		fatal("floating point exception");
+	} else if (sig == SIGSEGV
+#ifdef SIGBUS
+	        || sig == SIGBUS
+#endif
+	) {
+		set_loc(__FILE__, __LINE__);
+		msg("fatal error: internal error");
+		/* fatal won't abort() if not compiled for debugging */
+		abort();
+	} else
+		cant_happen();
+	/* NOTREACHED */
+}
+
+/* nostalgia --- print the famous error message and die */
+
+static void
+nostalgia()
+{
+	fprintf(stderr, "awk: bailing out near line 1\n");
+	abort();
+}
+
+/* version --- print version message */
+
+static void
+version()
+{
+	printf("%s.%d\n", version_string, PATCHLEVEL);
+	/*
+	 * Per GNU coding standards, print copyright info,
+	 * then exit successfully, do nothing else.
+	 */
+	copyleft();
+	exit(0);
+}
diff --git a/contrib/awk/missing.c b/contrib/awk/missing.c
new file mode 100644
index 0000000..7494d76
--- /dev/null
+++ b/contrib/awk/missing.c
@@ -0,0 +1,59 @@
+/*
+ * Do all necessary includes here, so that we don't have to worry about
+ * overlapping includes in the files in missing.d.
+ */
+#include "awk.h"
+
+
+#ifdef atarist
+/*
+ * this will work with gcc compiler - for other compilers you may
+ * have to replace path separators in this file into backslashes
+ */
+#include "atari/stack.c"
+#include "atari/tmpnam.c"
+#endif /* atarist */
+
+#ifndef HAVE_SYSTEM
+#ifdef atarist
+#include "atari/system.c"
+#else
+#include "missing/system.c"
+#endif
+#endif /* HAVE_SYSTEM */
+
+#ifndef HAVE_MEMCMP
+#include "missing/memcmp.c"
+#endif	/* HAVE_MEMCMP */
+
+#ifndef HAVE_MEMCPY
+#include "missing/memcpy.c"
+#endif	/* HAVE_MEMCPY */
+
+#ifndef HAVE_MEMSET
+#include "missing/memset.c"
+#endif	/* HAVE_MEMSET */
+
+#ifndef HAVE_STRNCASECMP
+#include "missing/strncasecmp.c"
+#endif	/* HAVE_STRCASE */
+
+#ifndef HAVE_STRERROR
+#include "missing/strerror.c"
+#endif	/* HAVE_STRERROR */
+
+#ifndef HAVE_STRFTIME
+#include "missing/strftime.c"
+#endif	/* HAVE_STRFTIME */
+
+#ifndef HAVE_STRCHR
+#include "missing/strchr.c"
+#endif	/* HAVE_STRCHR */
+
+#ifndef HAVE_STRTOD
+#include "missing/strtod.c"
+#endif	/* HAVE_STRTOD */
+
+#ifndef HAVE_TZSET
+#include "missing/tzset.c"
+#endif /* HAVE_TZSET */
diff --git a/contrib/awk/msg.c b/contrib/awk/msg.c
new file mode 100644
index 0000000..82fa422
--- /dev/null
+++ b/contrib/awk/msg.c
@@ -0,0 +1,189 @@
+/*
+ * msg.c - routines for error messages
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+
+int sourceline = 0;
+char *source = NULL;
+
+static char *srcfile = NULL;
+static int srcline;
+
+/* prototype needed for ansi / gcc */
+void err P((const char *s, const char *emsg, va_list argp));
+
+/* err --- print an error message with source line and file and record */
+
+/* VARARGS2 */
+void
+err(s, emsg, argp)
+const char *s;
+const char *emsg;
+va_list argp;
+{
+	char *file;
+
+	(void) fflush(stdout);
+	(void) fprintf(stderr, "%s: ", myname);
+#ifdef DEBUG
+	if (srcfile != NULL) {
+		fprintf(stderr, "%s:%d:", srcfile, srcline);
+		srcfile = NULL;
+	}
+#endif /* DEBUG */
+	if (sourceline != 0) {
+		if (source != NULL)
+			(void) fprintf(stderr, "%s:", source);
+		else
+			(void) fprintf(stderr, "cmd. line:");
+
+		(void) fprintf(stderr, "%d: ", sourceline);
+	}
+	if (FNR > 0) {
+		file = FILENAME_node->var_value->stptr;
+		(void) putc('(', stderr);
+		if (file)
+			(void) fprintf(stderr, "FILENAME=%s ", file);
+		(void) fprintf(stderr, "FNR=%ld) ", FNR);
+	}
+	(void) fprintf(stderr, s);
+	vfprintf(stderr, emsg, argp);
+	(void) fprintf(stderr, "\n");
+	(void) fflush(stderr);
+}
+
+/* msg --- take a varargs error message and print it */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+msg(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+msg(va_alist)
+va_dcl
+#endif
+{
+	va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+	va_start(args, mesg);
+#else
+	char *mesg;
+
+	va_start(args);
+	mesg = va_arg(args, char *);
+#endif
+	err("", mesg, args);
+	va_end(args);
+}
+
+/* warning --- print a warning message */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+warning(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+warning(va_alist)
+va_dcl
+#endif
+{
+	va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+	va_start(args, mesg);
+#else
+	char *mesg;
+
+	va_start(args);
+	mesg = va_arg(args, char *);
+#endif
+	err("warning: ", mesg, args);
+	va_end(args);
+}
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+error(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+error(va_alist)
+va_dcl
+#endif
+{
+	va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+	va_start(args, mesg);
+#else
+	char *mesg;
+
+	va_start(args);
+	mesg = va_arg(args, char *);
+#endif
+	err("error: ", mesg, args);
+	va_end(args);
+}
+
+/* set_loc --- set location where a fatal error happened */
+
+void
+set_loc(file, line)
+char *file;
+int line;
+{
+	srcfile = file;
+	srcline = line;
+}
+
+/* fatal --- print an error message and die */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+r_fatal(char *mesg, ...)
+#else
+/*VARARGS0*/
+void
+r_fatal(va_alist)
+va_dcl
+#endif
+{
+	va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+	va_start(args, mesg);
+#else
+	char *mesg;
+
+	va_start(args);
+	mesg = va_arg(args, char *);
+#endif
+	err("fatal: ", mesg, args);
+	va_end(args);
+#ifdef DEBUG
+	abort();
+#endif
+	exit(2);
+}
+
diff --git a/contrib/awk/node.c b/contrib/awk/node.c
new file mode 100644
index 0000000..6f10b9f
--- /dev/null
+++ b/contrib/awk/node.c
@@ -0,0 +1,515 @@
+/*
+ * node.c -- routines for node management
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+
+/* r_force_number --- force a value to be numeric */
+
+AWKNUM
+r_force_number(n)
+register NODE *n;
+{
+	register char *cp;
+	register char *cpend;
+	char save;
+	char *ptr;
+	unsigned int newflags;
+
+#ifdef DEBUG
+	if (n == NULL)
+		cant_happen();
+	if (n->type != Node_val)
+		cant_happen();
+	if(n->flags == 0)
+		cant_happen();
+	if (n->flags & NUM)
+		return n->numbr;
+#endif
+
+	/* all the conditionals are an attempt to avoid the expensive strtod */
+
+	n->numbr = 0.0;
+	n->flags |= NUM;
+
+	if (n->stlen == 0)
+		return 0.0;
+
+	cp = n->stptr;
+	if (ISALPHA(*cp))
+		return 0.0;
+
+	cpend = cp + n->stlen;
+	while (cp < cpend && isspace(*cp))
+		cp++;
+	if (cp == cpend || isalpha(*cp))
+		return 0.0;
+
+	if (n->flags & MAYBE_NUM) {
+		newflags = NUMBER;
+		n->flags &= ~MAYBE_NUM;
+	} else
+		newflags = 0;
+	if (cpend - cp == 1) {
+		if (ISDIGIT(*cp)) {
+			n->numbr = (AWKNUM)(*cp - '0');
+			n->flags |= newflags;
+		}
+		return n->numbr;
+	}
+
+#ifdef NONDECDATA
+	errno = 0;
+	if (! do_traditional && isnondecimal(cp)) {
+		n->numbr = nondec2awknum(cp, cpend - cp);
+		goto finish;
+	}
+#endif /* NONDECDATA */
+
+	errno = 0;
+	save = *cpend;
+	*cpend = '\0';
+	n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);
+
+	/* POSIX says trailing space is OK for NUMBER */
+	while (ISSPACE(*ptr))
+		ptr++;
+	*cpend = save;
+finish:
+	/* the >= should be ==, but for SunOS 3.5 strtod() */
+	if (errno == 0 && ptr >= cpend)
+		n->flags |= newflags;
+	else
+		errno = 0;
+
+	return n->numbr;
+}
+
+/*
+ * the following lookup table is used as an optimization in force_string
+ * (more complicated) variations on this theme didn't seem to pay off, but 
+ * systematic testing might be in order at some point
+ */
+static const char *values[] = {
+	"0",
+	"1",
+	"2",
+	"3",
+	"4",
+	"5",
+	"6",
+	"7",
+	"8",
+	"9",
+};
+#define	NVAL	(sizeof(values)/sizeof(values[0]))
+
+/* format_val --- format a numeric value based on format */
+
+NODE *
+format_val(format, index, s)
+char *format;
+int index;
+register NODE *s;
+{
+	char buf[128];
+	register char *sp = buf;
+	double val;
+
+	/* not an integral value, or out of range */
+	if ((val = double_to_int(s->numbr)) != s->numbr
+	    || val < LONG_MIN || val > LONG_MAX) {
+#ifdef GFMT_WORKAROUND
+		NODE *dummy, *r;
+		unsigned short oflags;
+		extern NODE *format_tree P((const char *, int, NODE *));
+		extern NODE **fmt_list;          /* declared in eval.c */
+
+		/* create dummy node for a sole use of format_tree */
+		getnode(dummy);
+		dummy->lnode = s;
+		dummy->rnode = NULL;
+		oflags = s->flags;
+		s->flags |= PERM; /* prevent from freeing by format_tree() */
+		r = format_tree(format, fmt_list[index]->stlen, dummy);
+		s->flags = oflags;
+		s->stfmt = (char) index;
+		s->stlen = r->stlen;
+		s->stptr = r->stptr;
+		freenode(r);		/* Do not free_temp(r)!  We want */
+		freenode(dummy);	/* to keep s->stptr == r->stpr.  */
+
+		goto no_malloc;
+#else
+		/*
+		 * no need for a "replacement" formatting by gawk,
+		 * just use sprintf
+		 */
+		sprintf(sp, format, s->numbr);
+		s->stlen = strlen(sp);
+		s->stfmt = (char) index;
+#endif /* GFMT_WORKAROUND */
+	} else {
+		/* integral value */
+	        /* force conversion to long only once */
+		register long num = (long) val;
+		if (num < NVAL && num >= 0) {
+			sp = (char *) values[num];
+			s->stlen = 1;
+		} else {
+			(void) sprintf(sp, "%ld", num);
+			s->stlen = strlen(sp);
+		}
+		s->stfmt = -1;
+	}
+	emalloc(s->stptr, char *, s->stlen + 2, "force_string");
+	memcpy(s->stptr, sp, s->stlen+1);
+#ifdef GFMT_WORKAROUND
+no_malloc:
+#endif /* GFMT_WORKAROUND */
+	s->stref = 1;
+	s->flags |= STR;
+	return s;
+}
+
+/* r_force_string --- force a value to be a string */
+
+NODE *
+r_force_string(s)
+register NODE *s;
+{
+#ifdef DEBUG
+	if (s == NULL)
+		cant_happen();
+	if (s->type != Node_val)
+		cant_happen();
+	if ((s->flags & NUM) == 0)
+		cant_happen();
+	if (s->stref <= 0)
+		cant_happen();
+	if ((s->flags & STR) != 0
+	    && (s->stfmt == -1 || s->stfmt == CONVFMTidx))
+		return s;
+#endif
+
+	return format_val(CONVFMT, CONVFMTidx, s);
+}
+
+/*
+ * dupnode:
+ * Duplicate a node.  (For strings, "duplicate" means crank up the
+ * reference count.)
+ */
+
+NODE *
+dupnode(n)
+NODE *n;
+{
+	register NODE *r;
+
+	if ((n->flags & TEMP) != 0) {
+		n->flags &= ~TEMP;
+		n->flags |= MALLOC;
+		return n;
+	}
+	if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) {
+		if (n->stref < LONG_MAX)
+			n->stref++;
+		return n;
+	}
+	getnode(r);
+	*r = *n;
+	r->flags &= ~(PERM|TEMP);
+	r->flags |= MALLOC;
+	if (n->type == Node_val && (n->flags & STR) != 0) {
+		r->stref = 1;
+		emalloc(r->stptr, char *, r->stlen + 2, "dupnode");
+		memcpy(r->stptr, n->stptr, r->stlen);
+		r->stptr[r->stlen] = '\0';
+	}
+	return r;
+}
+
+/* mk_number --- allocate a node with defined number */
+
+NODE *
+mk_number(x, flags)
+AWKNUM x;
+unsigned int flags;
+{
+	register NODE *r;
+
+	getnode(r);
+	r->type = Node_val;
+	r->numbr = x;
+	r->flags = flags | SCALAR;
+#ifdef DEBUG
+	r->stref = 1;
+	r->stptr = NULL;
+	r->stlen = 0;
+#endif
+	return r;
+}
+
+/* make_str_node --- make a string node */
+
+NODE *
+make_str_node(s, len, flags)
+char *s;
+size_t len;
+int flags;
+{
+	register NODE *r;
+
+	getnode(r);
+	r->type = Node_val;
+	r->flags = (STRING|STR|MALLOC|SCALAR);
+	if (flags & ALREADY_MALLOCED)
+		r->stptr = s;
+	else {
+		emalloc(r->stptr, char *, len + 2, s);
+		memcpy(r->stptr, s, len);
+	}
+	r->stptr[len] = '\0';
+	       
+	if ((flags & SCAN) != 0) {	/* scan for escape sequences */
+		char *pf;
+		register char *ptm;
+		register int c;
+		register char *end;
+
+		end = &(r->stptr[len]);
+		for (pf = ptm = r->stptr; pf < end;) {
+			c = *pf++;
+			if (c == '\\') {
+				c = parse_escape(&pf);
+				if (c < 0) {
+					if (do_lint)
+						warning("backslash at end of string");
+					c = '\\';
+				}
+				*ptm++ = c;
+			} else
+				*ptm++ = c;
+		}
+		len = ptm - r->stptr;
+		erealloc(r->stptr, char *, len + 1, "make_str_node");
+		r->stptr[len] = '\0';
+		r->flags |= PERM;
+	}
+	r->stlen = len;
+	r->stref = 1;
+	r->stfmt = -1;
+
+	return r;
+}
+
+/* tmp_string --- allocate a temporary string */
+
+NODE *
+tmp_string(s, len)
+char *s;
+size_t len;
+{
+	register NODE *r;
+
+	r = make_string(s, len);
+	r->flags |= TEMP;
+	return r;
+}
+
+/* more_nodes --- allocate more nodes */
+
+#define NODECHUNK	100
+
+NODE *nextfree = NULL;
+
+NODE *
+more_nodes()
+{
+	register NODE *np;
+
+	/* get more nodes and initialize list */
+	emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode");
+	for (np = nextfree; np <= &nextfree[NODECHUNK - 1]; np++) {
+		np->flags = 0;
+		np->nextp = np + 1;
+	}
+	--np;
+	np->nextp = NULL;
+	np = nextfree;
+	nextfree = nextfree->nextp;
+	return np;
+}
+
+#ifdef DEBUG
+/* freenode --- release a node back to the pool */
+
+void
+freenode(it)
+NODE *it;
+{
+	it->flags &= ~SCALAR;
+#ifdef MPROF
+	it->stref = 0;
+	free((char *) it);
+#else	/* not MPROF */
+	/* add it to head of freelist */
+	it->nextp = nextfree;
+	nextfree = it;
+#endif	/* not MPROF */
+}
+#endif	/* DEBUG */
+
+/* unref --- remove reference to a particular node */
+
+void
+unref(tmp)
+register NODE *tmp;
+{
+	if (tmp == NULL)
+		return;
+	if ((tmp->flags & PERM) != 0)
+		return;
+	if ((tmp->flags & (MALLOC|TEMP)) != 0) {
+		tmp->flags &= ~TEMP;
+		if ((tmp->flags & STR) != 0) {
+			if (tmp->stref > 1) {
+				if (tmp->stref != LONG_MAX)
+					tmp->stref--;
+				return;
+			}
+			free(tmp->stptr);
+		}
+		freenode(tmp);
+		return;
+	}
+	if ((tmp->flags & FIELD) != 0) {
+		freenode(tmp);
+		return;
+	}
+}
+
+/*
+ * parse_escape:
+ *
+ * Parse a C escape sequence.  STRING_PTR points to a variable containing a
+ * pointer to the string to parse.  That pointer is updated past the
+ * characters we use.  The value of the escape sequence is returned. 
+ *
+ * A negative value means the sequence \ newline was seen, which is supposed to
+ * be equivalent to nothing at all. 
+ *
+ * If \ is followed by a null character, we return a negative value and leave
+ * the string pointer pointing at the null character. 
+ *
+ * If \ is followed by 000, we return 0 and leave the string pointer after the
+ * zeros.  A value of 0 does not mean end of string.  
+ *
+ * Posix doesn't allow \x.
+ */
+
+int
+parse_escape(string_ptr)
+char **string_ptr;
+{
+	register int c = *(*string_ptr)++;
+	register int i;
+	register int count;
+
+	switch (c) {
+	case 'a':
+		return BELL;
+	case 'b':
+		return '\b';
+	case 'f':
+		return '\f';
+	case 'n':
+		return '\n';
+	case 'r':
+		return '\r';
+	case 't':
+		return '\t';
+	case 'v':
+		return '\v';
+	case '\n':
+		return -2;
+	case 0:
+		(*string_ptr)--;
+		return -1;
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		i = c - '0';
+		count = 0;
+		while (++count < 3) {
+			if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+				i *= 8;
+				i += c - '0';
+			} else {
+				(*string_ptr)--;
+				break;
+			}
+		}
+		return i;
+	case 'x':
+		if (do_lint) {
+			static int didwarn = FALSE;
+
+			if (! didwarn) {
+				didwarn = TRUE;
+				warning("POSIX does not allow \"\\x\" escapes");
+			}
+		}
+		if (do_posix)
+			return ('x');
+		if (! isxdigit((*string_ptr)[0])) {
+			warning("no hex digits in \\x escape sequence");
+			return ('x');
+		}
+		i = 0;
+		for (;;) {
+			if (ISXDIGIT((c = *(*string_ptr)++))) {
+				i *= 16;
+				if (ISDIGIT(c))
+					i += c - '0';
+				else if (ISUPPER(c))
+					i += c - 'A' + 10;
+				else
+					i += c - 'a' + 10;
+			} else {
+				(*string_ptr)--;
+				break;
+			}
+		}
+		return i;
+	default:
+		return c;
+	}
+}
diff --git a/contrib/awk/patchlevel.h b/contrib/awk/patchlevel.h
new file mode 100644
index 0000000..f360824
--- /dev/null
+++ b/contrib/awk/patchlevel.h
@@ -0,0 +1 @@
+#define PATCHLEVEL	3
diff --git a/contrib/awk/posix/ChangeLog b/contrib/awk/posix/ChangeLog
new file mode 100644
index 0000000..575baa1
--- /dev/null
+++ b/contrib/awk/posix/ChangeLog
@@ -0,0 +1,19 @@
+Thu May 15 12:49:08 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.3: Release tar file made.
+
+Fri Apr 18 07:55:47 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* BETA Release 3.0.34: Release tar file made.
+
+Wed Dec 25 11:25:22 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.2: Release tar file made.
+
+Tue Dec 10 23:09:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.1: Release tar file made.
+
+Wed Jan 10 22:58:55 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* ChangeLog created.
diff --git a/contrib/awk/posix/gawkmisc.c b/contrib/awk/posix/gawkmisc.c
new file mode 100644
index 0000000..68bfb5d
--- /dev/null
+++ b/contrib/awk/posix/gawkmisc.c
@@ -0,0 +1,108 @@
+/* gawkmisc.c --- miscellanious gawk routines that are OS specific.
+ 
+   Copyright (C) 1986, 1988, 1989, 1991 - 96 the Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+char quote = '\'';
+char *defpath = DEFPATH;
+char envsep = ':';
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+	char *p;
+    
+	/* "path/name" -> "name" */
+	p = strrchr(filespec, '/');
+	return (p == NULL ? (char *) filespec : p + 1);
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+	/* no-op */
+	return;
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+	/* no-op */
+	return INVALID_HANDLE;
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+	/* force all members to zero in case OS doesn't use all of them. */
+	memset(stb, '\0', sizeof(struct stat));
+
+	/*
+	 * System V.n, n < 4, doesn't have the file system block size in the
+	 * stat structure. So we have to make some sort of reasonable
+	 * guess. We use stdio's BUFSIZ, since that is what it was
+	 * meant for in the first place.
+	 */
+#ifdef HAVE_ST_BLKSIZE
+#define DEFBLKSIZE	(stb->st_blksize ? stb->st_blksize : BUFSIZ)
+#else
+#define	DEFBLKSIZE	BUFSIZ
+#endif
+
+	if (isatty(fd))
+		return BUFSIZ;
+	if (fstat(fd, stb) == -1)
+		fatal("can't stat fd %d (%s)", fd, strerror(errno));
+	if (lseek(fd, (off_t)0, 0) == -1)	/* not a regular file */
+		return DEFBLKSIZE;
+	if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */
+		return stb->st_size;
+	return DEFBLKSIZE;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+	return (strchr(file, '/') != NULL);
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+	return (c == '/');
+}
+
diff --git a/contrib/awk/random.c b/contrib/awk/random.c
new file mode 100644
index 0000000..002b226
--- /dev/null
+++ b/contrib/awk/random.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 1983 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)random.c	5.5 (Berkeley) 7/6/88";
+#endif /* LIBC_SCCS and not lint */
+
+#include <stdio.h>
+#include "random.h"	/* GAWK ADDITION */
+
+/*
+ * random.c:
+ * An improved random number generation package.  In addition to the standard
+ * rand()/srand() like interface, this package also has a special state info
+ * interface.  The initstate() routine is called with a seed, an array of
+ * bytes, and a count of how many bytes are being passed in; this array is then
+ * initialized to contain information for random number generation with that
+ * much state information.  Good sizes for the amount of state information are
+ * 32, 64, 128, and 256 bytes.  The state can be switched by calling the
+ * setstate() routine with the same array as was initiallized with initstate().
+ * By default, the package runs with 128 bytes of state information and
+ * generates far better random numbers than a linear congruential generator.
+ * If the amount of state information is less than 32 bytes, a simple linear
+ * congruential R.N.G. is used.
+ * Internally, the state information is treated as an array of longs; the
+ * zeroeth element of the array is the type of R.N.G. being used (small
+ * integer); the remainder of the array is the state information for the
+ * R.N.G.  Thus, 32 bytes of state information will give 7 longs worth of
+ * state information, which will allow a degree seven polynomial.  (Note: the
+ * zeroeth word of state information also has some other information stored
+ * in it -- see setstate() for details).
+ * The random number generation technique is a linear feedback shift register
+ * approach, employing trinomials (since there are fewer terms to sum up that
+ * way).  In this approach, the least significant bit of all the numbers in
+ * the state table will act as a linear feedback shift register, and will have
+ * period 2^deg - 1 (where deg is the degree of the polynomial being used,
+ * assuming that the polynomial is irreducible and primitive).  The higher
+ * order bits will have longer periods, since their values are also influenced
+ * by pseudo-random carries out of the lower bits.  The total period of the
+ * generator is approximately deg*(2**deg - 1); thus doubling the amount of
+ * state information has a vast influence on the period of the generator.
+ * Note: the deg*(2**deg - 1) is an approximation only good for large deg,
+ * when the period of the shift register is the dominant factor.  With deg
+ * equal to seven, the period is actually much longer than the 7*(2**7 - 1)
+ * predicted by this formula.
+ */
+
+
+
+/*
+ * For each of the currently supported random number generators, we have a
+ * break value on the amount of state information (you need at least this
+ * many bytes of state info to support this random number generator), a degree
+ * for the polynomial (actually a trinomial) that the R.N.G. is based on, and
+ * the separation between the two lower order coefficients of the trinomial.
+ */
+
+#define		TYPE_0		0		/* linear congruential */
+#define		BREAK_0		8
+#define		DEG_0		0
+#define		SEP_0		0
+
+#define		TYPE_1		1		/* x**7 + x**3 + 1 */
+#define		BREAK_1		32
+#define		DEG_1		7
+#define		SEP_1		3
+
+#define		TYPE_2		2		/* x**15 + x + 1 */
+#define		BREAK_2		64
+#define		DEG_2		15
+#define		SEP_2		1
+
+#define		TYPE_3		3		/* x**31 + x**3 + 1 */
+#define		BREAK_3		128
+#define		DEG_3		31
+#define		SEP_3		3
+#ifdef _CRAY
+#define		DEG_3_P1	32		/* bug - do addition here */
+#define		SEP_3_P1	4		/* *_3 + 1 = _3_P1 */
+#endif
+
+#define		TYPE_4		4		/* x**63 + x + 1 */
+#define		BREAK_4		256
+#define		DEG_4		63
+#define		SEP_4		1
+
+
+/*
+ * Array versions of the above information to make code run faster -- relies
+ * on fact that TYPE_i == i.
+ */
+
+#define		MAX_TYPES	5		/* max number of types above */
+
+static  int		degrees[ MAX_TYPES ]	= { DEG_0, DEG_1, DEG_2,
+								DEG_3, DEG_4 };
+
+static  int		seps[ MAX_TYPES ]	= { SEP_0, SEP_1, SEP_2,
+								SEP_3, SEP_4 };
+
+
+
+/*
+ * Initially, everything is set up as if from :
+ *		initstate( 1, &randtbl, 128 );
+ * Note that this initialization takes advantage of the fact that srandom()
+ * advances the front and rear pointers 10*rand_deg times, and hence the
+ * rear pointer which starts at 0 will also end up at zero; thus the zeroeth
+ * element of the state information, which contains info about the current
+ * position of the rear pointer is just
+ *	MAX_TYPES*(rptr - state) + TYPE_3 == TYPE_3.
+ */
+
+static  long		randtbl[ DEG_3 + 1 ]	= { TYPE_3,
+			    0x9a319039, 0x32d9c024, 0x9b663182, 0x5da1f342,
+			    0xde3b81e0, 0xdf0a6fb5, 0xf103bc02, 0x48f340fb,
+			    0x7449e56b, 0xbeb1dbb0, 0xab5c5918, 0x946554fd,
+			    0x8c2e680f, 0xeb3d799f, 0xb11ee0b7, 0x2d436b86,
+			    0xda672e2a, 0x1588ca88, 0xe369735d, 0x904f35f7,
+			    0xd7158fd6, 0x6fa6f051, 0x616e6b96, 0xac94efdc,
+			    0x36413f93, 0xc622c298, 0xf5a42ab8, 0x8a88d77b,
+					0xf5ad9d0e, 0x8999220b, 0x27fb47b9 };
+
+/*
+ * fptr and rptr are two pointers into the state info, a front and a rear
+ * pointer.  These two pointers are always rand_sep places aparts, as they cycle
+ * cyclically through the state information.  (Yes, this does mean we could get
+ * away with just one pointer, but the code for random() is more efficient this
+ * way).  The pointers are left positioned as they would be from the call
+ *			initstate( 1, randtbl, 128 )
+ * (The position of the rear pointer, rptr, is really 0 (as explained above
+ * in the initialization of randtbl) because the state table pointer is set
+ * to point to randtbl[1] (as explained below).
+ */
+
+#ifdef _CRAY
+static  long		*fptr			= &randtbl[ SEP_3_P1 ];
+#else
+static  long		*fptr			= &randtbl[ SEP_3 + 1 ];
+#endif
+static  long		*rptr			= &randtbl[ 1 ];
+
+
+
+/*
+ * The following things are the pointer to the state information table,
+ * the type of the current generator, the degree of the current polynomial
+ * being used, and the separation between the two pointers.
+ * Note that for efficiency of random(), we remember the first location of
+ * the state information, not the zeroeth.  Hence it is valid to access
+ * state[-1], which is used to store the type of the R.N.G.
+ * Also, we remember the last location, since this is more efficient than
+ * indexing every time to find the address of the last element to see if
+ * the front and rear pointers have wrapped.
+ */
+
+static  long		*state			= &randtbl[ 1 ];
+
+static  int		rand_type		= TYPE_3;
+static  int		rand_deg		= DEG_3;
+static  int		rand_sep		= SEP_3;
+
+#ifdef _CRAY
+static  long		*end_ptr		= &randtbl[ DEG_3_P1 ];
+#else
+static  long		*end_ptr		= &randtbl[ DEG_3 + 1 ];
+#endif
+
+
+
+/*
+ * srandom:
+ * Initialize the random number generator based on the given seed.  If the
+ * type is the trivial no-state-information type, just remember the seed.
+ * Otherwise, initializes state[] based on the given "seed" via a linear
+ * congruential generator.  Then, the pointers are set to known locations
+ * that are exactly rand_sep places apart.  Lastly, it cycles the state
+ * information a given number of times to get rid of any initial dependencies
+ * introduced by the L.C.R.N.G.
+ * Note that the initialization of randtbl[] for default usage relies on
+ * values produced by this routine.
+ */
+
+void
+srandom( x )
+
+    unsigned		x;
+{
+    	register  int		i, j;
+	long random();
+
+	if(  rand_type  ==  TYPE_0  )  {
+	    state[ 0 ] = x;
+	}
+	else  {
+	    j = 1;
+	    state[ 0 ] = x;
+	    for( i = 1; i < rand_deg; i++ )  {
+		state[i] = 1103515245*state[i - 1] + 12345;
+	    }
+	    fptr = &state[ rand_sep ];
+	    rptr = &state[ 0 ];
+	    for( i = 0; i < 10*rand_deg; i++ )  random();
+	}
+}
+
+
+
+/*
+ * initstate:
+ * Initialize the state information in the given array of n bytes for
+ * future random number generation.  Based on the number of bytes we
+ * are given, and the break values for the different R.N.G.'s, we choose
+ * the best (largest) one we can and set things up for it.  srandom() is
+ * then called to initialize the state information.
+ * Note that on return from srandom(), we set state[-1] to be the type
+ * multiplexed with the current value of the rear pointer; this is so
+ * successive calls to initstate() won't lose this information and will
+ * be able to restart with setstate().
+ * Note: the first thing we do is save the current state, if any, just like
+ * setstate() so that it doesn't matter when initstate is called.
+ * Returns a pointer to the old state.
+ */
+
+char  *
+initstate( seed, arg_state, n )
+
+    unsigned		seed;			/* seed for R. N. G. */
+    char		*arg_state;		/* pointer to state array */
+    int			n;			/* # bytes of state info */
+{
+	register  char		*ostate		= (char *)( &state[ -1 ] );
+
+	if(  rand_type  ==  TYPE_0  )  state[ -1 ] = rand_type;
+	else  state[ -1 ] = MAX_TYPES*(rptr - state) + rand_type;
+	if(  n  <  BREAK_1  )  {
+	    if(  n  <  BREAK_0  )  {
+		fprintf( stderr, "initstate: not enough state (%d bytes) with which to do jack; ignored.\n", n );
+		return 0;
+	    }
+	    rand_type = TYPE_0;
+	    rand_deg = DEG_0;
+	    rand_sep = SEP_0;
+	}
+	else  {
+	    if(  n  <  BREAK_2  )  {
+		rand_type = TYPE_1;
+		rand_deg = DEG_1;
+		rand_sep = SEP_1;
+	    }
+	    else  {
+		if(  n  <  BREAK_3  )  {
+		    rand_type = TYPE_2;
+		    rand_deg = DEG_2;
+		    rand_sep = SEP_2;
+		}
+		else  {
+		    if(  n  <  BREAK_4  )  {
+			rand_type = TYPE_3;
+			rand_deg = DEG_3;
+			rand_sep = SEP_3;
+		    }
+		    else  {
+			rand_type = TYPE_4;
+			rand_deg = DEG_4;
+			rand_sep = SEP_4;
+		    }
+		}
+	    }
+	}
+	state = &(  ( (long *)arg_state )[1]  );	/* first location */
+	end_ptr = &state[ rand_deg ];	/* must set end_ptr before srandom */
+	srandom( seed );
+	if(  rand_type  ==  TYPE_0  )  state[ -1 ] = rand_type;
+	else  state[ -1 ] = MAX_TYPES*(rptr - state) + rand_type;
+	return( ostate );
+}
+
+
+
+/*
+ * setstate:
+ * Restore the state from the given state array.
+ * Note: it is important that we also remember the locations of the pointers
+ * in the current state information, and restore the locations of the pointers
+ * from the old state information.  This is done by multiplexing the pointer
+ * location into the zeroeth word of the state information.
+ * Note that due to the order in which things are done, it is OK to call
+ * setstate() with the same state as the current state.
+ * Returns a pointer to the old state information.
+ */
+
+char  *
+setstate( arg_state )
+
+    char		*arg_state;
+{
+	register  long		*new_state	= (long *)arg_state;
+	register  int		type		= new_state[0]%MAX_TYPES;
+	register  int		rear		= new_state[0]/MAX_TYPES;
+	char			*ostate		= (char *)( &state[ -1 ] );
+
+	if(  rand_type  ==  TYPE_0  )  state[ -1 ] = rand_type;
+	else  state[ -1 ] = MAX_TYPES*(rptr - state) + rand_type;
+	switch(  type  )  {
+	    case  TYPE_0:
+	    case  TYPE_1:
+	    case  TYPE_2:
+	    case  TYPE_3:
+	    case  TYPE_4:
+		rand_type = type;
+		rand_deg = degrees[ type ];
+		rand_sep = seps[ type ];
+		break;
+
+	    default:
+		fprintf( stderr, "setstate: state info has been munged; not changed.\n" );
+	}
+	state = &new_state[ 1 ];
+	if(  rand_type  !=  TYPE_0  )  {
+	    rptr = &state[ rear ];
+	    fptr = &state[ (rear + rand_sep)%rand_deg ];
+	}
+	end_ptr = &state[ rand_deg ];		/* set end_ptr too */
+	return( ostate );
+}
+
+
+
+/*
+ * random:
+ * If we are using the trivial TYPE_0 R.N.G., just do the old linear
+ * congruential bit.  Otherwise, we do our fancy trinomial stuff, which is the
+ * same in all ther other cases due to all the global variables that have been
+ * set up.  The basic operation is to add the number at the rear pointer into
+ * the one at the front pointer.  Then both pointers are advanced to the next
+ * location cyclically in the table.  The value returned is the sum generated,
+ * reduced to 31 bits by throwing away the "least random" low bit.
+ * Note: the code takes advantage of the fact that both the front and
+ * rear pointers can't wrap on the same call by not testing the rear
+ * pointer if the front one has wrapped.
+ * Returns a 31-bit random number.
+ */
+
+long
+random()
+{
+	long		i;
+
+	if(  rand_type  ==  TYPE_0  )  {
+	    i = state[0] = ( state[0]*1103515245 + 12345 )&0x7fffffff;
+	}
+	else  {
+	    *fptr += *rptr;
+	    i = (*fptr >> 1)&0x7fffffff;	/* chucking least random bit */
+	    if(  ++fptr  >=  end_ptr  )  {
+		fptr = state;
+		++rptr;
+	    }
+	    else  {
+		if(  ++rptr  >=  end_ptr  )  rptr = state;
+	    }
+	}
+	return( i );
+}
diff --git a/contrib/awk/random.h b/contrib/awk/random.h
new file mode 100644
index 0000000..7fd0ff9
--- /dev/null
+++ b/contrib/awk/random.h
@@ -0,0 +1,29 @@
+/*
+ * random.h - redefine name of random lib routines to avoid conflicts
+ */
+
+/* 
+ * Copyright (C) 1996 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#define initstate gawk_initstate
+#define setstate gawk_setstate
+#define random gawk_random
+#define srandom gawk_srandom
diff --git a/contrib/awk/re.c b/contrib/awk/re.c
new file mode 100644
index 0000000..995fbb9
--- /dev/null
+++ b/contrib/awk/re.c
@@ -0,0 +1,310 @@
+/*
+ * re.c - compile regular expressions.
+ */
+
+/* 
+ * Copyright (C) 1991-1996 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ */
+
+#include "awk.h"
+
+static reg_syntax_t syn;
+
+/* make_regexp --- generate compiled regular expressions */
+
+Regexp *
+make_regexp(s, len, ignorecase, dfa)
+char *s;
+size_t len;
+int ignorecase;
+int dfa;
+{
+	Regexp *rp;
+	const char *rerr;
+	char *src = s;
+	char *temp;
+	char *end = s + len;
+	register char *dest;
+	register int c, c2;
+
+	/* Handle escaped characters first. */
+
+	/*
+	 * Build a copy of the string (in dest) with the
+	 * escaped characters translated, and generate the regex
+	 * from that.  
+	 */
+	emalloc(dest, char *, len + 2, "make_regexp");
+	temp = dest;
+
+	while (src < end) {
+		if (*src == '\\') {
+			c = *++src;
+			switch (c) {
+			case 'a':
+			case 'b':
+			case 'f':
+			case 'n':
+			case 'r':
+			case 't':
+			case 'v':
+			case 'x':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+				c2 = parse_escape(&src);
+				if (c2 < 0)
+					cant_happen();
+				/*
+				 * Unix awk treats octal (and hex?) chars
+				 * literally in re's, so escape regexp
+				 * metacharacters.
+				 */
+				if (do_traditional && ! do_posix && (isdigit(c) || c == 'x')
+				    && strchr("()|*+?.^$\\[]", c2) != NULL)
+					*dest++ = '\\';
+				*dest++ = (char) c2;
+				break;
+			case '8':
+			case '9':	/* a\9b not valid */
+				*dest++ = c;
+				src++;
+				break;
+			case 'y':	/* normally \b */
+				/* gnu regex op */
+				if (! do_traditional) {
+					*dest++ = '\\';
+					*dest++ = 'b';
+					src++;
+					break;
+				}
+				/* else, fall through */
+			default:
+				*dest++ = '\\';
+				*dest++ = (char) c;
+				src++;
+				break;
+			} /* switch */
+		} else
+			*dest++ = *src++;	/* not '\\' */
+	} /* for */
+
+	*dest = '\0' ;	/* Only necessary if we print dest ? */
+	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
+	memset((char *) rp, 0, sizeof(*rp));
+	rp->pat.allocated = 0;	/* regex will allocate the buffer */
+	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
+
+	if (ignorecase)
+		rp->pat.translate = casetable;
+	else
+		rp->pat.translate = NULL;
+	len = dest - temp;
+	if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
+		fatal("%s: /%s/", rerr, temp);
+
+	/* gack. this must be done *after* re_compile_pattern */
+	rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
+	if (dfa && ! ignorecase) {
+		dfacomp(temp, len, &(rp->dfareg), TRUE);
+		rp->dfa = TRUE;
+	} else
+		rp->dfa = FALSE;
+
+	free(temp);
+	return rp;
+}
+
+/* research --- do a regexp search. use dfa if possible */
+
+int
+research(rp, str, start, len, need_start)
+Regexp *rp;
+register char *str;
+int start;
+register size_t len;
+int need_start;
+{
+	char *ret = str;
+	int try_backref;
+
+	/*
+	 * Always do dfa search if can; if it fails, then even if
+	 * need_start is true, we won't bother with the regex search.
+	 */
+	if (rp->dfa) {
+		char save;
+		int count = 0;
+
+		/*
+		 * dfa likes to stick a '\n' right after the matched
+		 * text.  So we just save and restore the character.
+		 */
+		save = str[start+len];
+		ret = dfaexec(&(rp->dfareg), str+start, str+start+len, TRUE,
+					&count, &try_backref);
+		str[start+len] = save;
+	}
+	if (ret) {
+		if (need_start || rp->dfa == FALSE || try_backref) {
+			int result = re_search(&(rp->pat), str, start+len,
+					start, len, &(rp->regs));
+			/* recover any space from C based alloca */
+#ifdef C_ALLOCA
+			(void) alloca(0);
+#endif
+			return result;
+		} else
+			return 1;
+	} else
+		return -1;
+}
+
+/* refree --- free up the dynamic memory used by a compiled regexp */
+
+void
+refree(rp)
+Regexp *rp;
+{
+	free(rp->pat.buffer);
+	free(rp->pat.fastmap);
+	if (rp->regs.start)
+		free(rp->regs.start);
+	if (rp->regs.end)
+		free(rp->regs.end);
+	if (rp->dfa)
+		dfafree(&(rp->dfareg));
+	free(rp);
+}
+
+/* dfaerror --- print an error message for the dfa routines */
+
+void
+dfaerror(s)
+const char *s;
+{
+	fatal("%s", s);
+}
+
+/* re_update --- recompile a dynamic regexp */
+
+Regexp *
+re_update(t)
+NODE *t;
+{
+	NODE *t1;
+
+/* #	define	CASE	1 */
+	if ((t->re_flags & CASE) == IGNORECASE) {
+		if ((t->re_flags & CONST) != 0)
+			return t->re_reg;
+		t1 = force_string(tree_eval(t->re_exp));
+		if (t->re_text != NULL) {
+			if (cmp_nodes(t->re_text, t1) == 0) {
+				free_temp(t1);
+				return t->re_reg;
+			}
+			unref(t->re_text);
+		}
+		t->re_text = dupnode(t1);
+		free_temp(t1);
+	}
+	if (t->re_reg != NULL)
+		refree(t->re_reg);
+	if (t->re_cnt > 0)
+		t->re_cnt++;
+	if (t->re_cnt > 10)
+		t->re_cnt = 0;
+	if (t->re_text == NULL) {
+		t1 = force_string(tree_eval(t->re_exp));
+		t->re_text = dupnode(t1);
+		free_temp(t1);
+	}
+	t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
+				IGNORECASE, t->re_cnt);
+	t->re_flags &= ~CASE;
+	t->re_flags |= IGNORECASE;
+	return t->re_reg;
+}
+
+/* resetup --- choose what kind of regexps we match */
+
+void
+resetup()
+{
+	if (do_posix)
+		syn = RE_SYNTAX_POSIX_AWK;	/* strict POSIX re's */
+	else if (do_traditional)
+		syn = RE_SYNTAX_AWK;		/* traditional Unix awk re's */
+	else
+		syn = RE_SYNTAX_GNU_AWK;	/* POSIX re's + GNU ops */
+
+	/*
+	 * Interval expressions are off by default, since it's likely to
+	 * break too many old programs to have them on.
+	 */
+	if (do_intervals)
+		syn |= RE_INTERVALS;
+
+	(void) re_set_syntax(syn);
+	dfasyntax(syn, FALSE);
+}
+
+/* avoid_dfa --- FIXME: temporary kludge function until we have a new dfa.c */
+
+int
+avoid_dfa(re, str, len)
+NODE *re;
+char *str;
+size_t len;
+{
+	char *restr;
+	int relen;
+	int anchor, i;
+	char *end;
+
+	if ((re->re_flags & CONST) != 0) {
+		restr = re->re_exp->stptr;
+		relen = re->re_exp->stlen;
+	} else {
+		restr = re->re_text->stptr;
+		relen = re->re_text->stlen;
+	}
+
+	for (anchor = FALSE, i = 0; i < relen; i++) {
+		if (restr[i] == '^' || restr[i] == '$') {
+			anchor = TRUE;
+			break;
+		}
+	}
+	if (! anchor)
+		return FALSE;
+
+	for (end = str + len; str < end; str++)
+		if (*str == '\n')
+			return TRUE;
+
+	return FALSE;
+}
diff --git a/contrib/awk/regex.h b/contrib/awk/regex.h
new file mode 100644
index 0000000..5140052
--- /dev/null
+++ b/contrib/awk/regex.h
@@ -0,0 +1,531 @@
+/* Definitions for data structures and routines for the regular
+   expression library, version 0.12.
+   Copyright (C) 1985,89,90,91,92,93,95,96,97 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* Allow the use in C++ code.  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+   <regex.h>.  */
+
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+   should be there.  */
+#include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+   wide enough to hold a value of a pointer.  For most ANSI compilers
+   ptrdiff_t and size_t should be likely OK.  Still size of these two
+   types is 2 for Microsoft C.  Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals.
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically,
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES.
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+   If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+   If not set, and debugging was on, turn it off.
+   This only works if regex.c is compiled -DDEBUG.
+   We define this bit always, so that all that's needed to turn on
+   debugging is to recompile regex.c; the calling code can always have
+   this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
+   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
+   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK						\
+  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
+   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
+
+#define RE_SYNTAX_POSIX_AWK 						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
+   | RE_INTERVALS	    | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Not implemented.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+	/* Space that holds the compiled pattern.  It is declared as
+          `unsigned char *' because its elements are
+           sometimes used as array indexes.  */
+  unsigned char *buffer;
+
+	/* Number of bytes to which `buffer' points.  */
+  unsigned long int allocated;
+
+	/* Number of bytes actually used in `buffer'.  */
+  unsigned long int used;
+
+        /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+           the fastmap, if there is one, to skip over impossible
+           starting points for matches.  */
+  char *fastmap;
+
+        /* Either a translate table to apply to all characters before
+           comparing them, or zero for no translation.  The translation
+           is applied to a pattern when it is compiled and to a string
+           when it is matched.  */
+  RE_TRANSLATE_TYPE translate;
+
+	/* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+        /* Zero if this pattern cannot match the empty string, one else.
+           Well, in truth it's used only in `re_search_2', to see
+           whether or not we should use the fastmap, so we don't set
+           this absolutely perfectly; see `re_compile_fastmap' (the
+           `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+             for `max (RE_NREGS, re_nsub + 1)' groups.
+           If REGS_REALLOCATE, reallocate space if necessary.
+           If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+        /* Set to zero when `regex_compile' compiles a pattern; set to one
+           by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+        /* If set, `re_match_2' does not return information about
+           subexpressions.  */
+  unsigned no_sub : 1;
+
+        /* If set, a beginning-of-line anchor doesn't match at the
+           beginning of the string.  */
+  unsigned not_bol : 1;
+
+        /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+        /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* To avoid duplicating every routine declaration -- once with a
+   prototype (if we are ANSI), and once without (if we aren't) -- we
+   use the following macro to declare argument types.  This
+   unfortunately clutters up the declarations a bit, but I think it's
+   worth it.  */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern
+  _RE_ARGS ((const char *pattern, size_t length,
+             struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+            int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+             int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers
+  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+             unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#ifdef _REGEX_RE_COMP
+#ifndef _CRAY
+/* 4.2 bsd compatibility.  */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+#endif
+#endif
+
+/* POSIX compatibility.  */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+  _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+             regmatch_t pmatch[], int eflags));
+extern size_t regerror
+  _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+             size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+
+#ifdef __cplusplus
+}
+#endif	/* C++ */
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/contrib/awk/test/ChangeLog b/contrib/awk/test/ChangeLog
new file mode 100644
index 0000000..8efafaa
--- /dev/null
+++ b/contrib/awk/test/ChangeLog
@@ -0,0 +1,252 @@
+Thu May 15 12:49:08 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.3: Release tar file made.
+
+Tue May 13 12:53:46 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (messages): more testing for OK failure on Linux.
+
+Sun May 11 14:57:11 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (nondec): new test case.
+	* nondec.awk, nondec.ok: new files.
+
+Sun May 11 07:07:05 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (prdupval): new test case.
+	* prdupval.awk, prdupval.in, prdupval.ok: new files.
+
+Wed May  7 21:54:34 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (delarprm): new test case.
+	* delarprm.awk, delarprm.ok: new files.
+
+Wed May  7 17:54:00 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (pid): several fixes from ghazi@caip.rutgers.edu.
+
+Tue May  6 20:28:30 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (strftime): Use the right locale stuff.
+	  (clobber): don't need an input file.
+
+Thu Apr 24 22:24:42 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (pid): new test case, from jco@convex.com.
+	  (specfile): removed test case, pid does it better.
+	* pid.awk, pid.ok, pid.sh: new files.
+	* specfile.awk: removed.
+
+Wed Apr 23 23:37:10 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (pipeio2): new test case.
+	* pipeio2.awk, pipeio2.ok, pipeio2.in: new files.
+
+Sun Apr 20 12:22:52 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (clobber): new test case.
+	* clobber.awk, clobber.ok: new files.
+
+Fri Apr 18 07:55:47 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* BETA Release 3.0.34: Release tar file made.
+
+Tue Apr 15 05:57:29 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (strftlng): More wizardry for bizarre Unix systems.
+	  (nlfldsep): use program and input file, not shell script
+	  (basic, unix-tests, gawk.extensions): moved specfile, pipeio1
+	  and strftlng into unix-tests per Pat Rankin.
+	* nlfldsep.awk, nlfldsep.in: new files.
+	* nlfldsep.sh: removed.
+
+Wed Apr  9 23:32:47 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (funstack): new test case.
+	* funstack.awk, funstack.in, funstack.ok: new files.
+	* substr.awk: added many more tests.
+	* substr.ok: updated
+
+Wed Mar 19 20:10:21 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (pipeio1): new test case.
+	* pipeio1.awk, pipeio1.ok: new files.
+
+Tue Mar 18 06:38:36 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (noparm): new test case.
+	* noparm.awk, noparm.ok: new files.
+
+Fri Feb 21 06:30:18 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (reint): new test case.
+	* reint.awk, reint.in, reint.ok: new files.
+
+Wed Feb  5 18:17:51 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (fnarydel): new test case.
+	* fnarydel.awk, fnarydel.ok: new files.
+
+Sun Jan 19 17:06:18 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (nors): new test case.
+	* nors.ok: new file.
+
+Sun Jan 19 17:06:18 1997  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (specfile, strftlng, nfldstr): new test cases.
+	* specfile.awk, strftlng.awk, strftlng.ok, nfldstr.ok: new files.
+
+Fri Dec 27 11:27:13 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (intest): new test case.
+	* intest.awk, intest.ok: new files.
+
+Wed Dec 25 11:25:22 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.2: Release tar file made.
+
+Tue Dec 10 23:09:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Release 3.0.1: Release tar file made.
+
+Thu Nov  7 09:12:20 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (splitvar): new test case.
+	* splitvar.awk, splitvar.in, splitvar.ok: new files.
+
+Sun Nov  3 10:55:50 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (nlfldsep): new test case.
+	* nlfldsep.sh, nlfldsep.ok: new files.
+
+Fri Oct 25 10:29:56 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* rand.awk: call srand with fixed seed.
+	* rand.ok: new file.
+	* Makefile.in (rand): changed to compare output with rand.ok.
+
+Sat Oct 19 21:52:04 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (tradanch): new test case.
+	* tradanch.awk, tradanch.in, tradanch.ok: new files.
+
+Thu Oct 17 21:22:05 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* tweakfld.awk: move `rm' out into Makefile.in.
+	* eofsplit.awk: fixed buggy code so won't loop forever.
+	* Makefile.in (all): add unix-tests.
+	  (unix-tests): new target, has pound-bang, fflush, getlnhd.
+	  (basic): removed fflush, getlnhd.
+	  (tweakfld): added rm from tweakfld.awk.
+
+Sun Oct  6 22:00:35 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (back89): new test case.
+	* back89.in, back89.ok: new files.
+
+Sun Oct  6 20:45:54 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (splitwht): new test case.
+	* splitwht.awk, splitwht.ok: new files.
+
+Sun Sep 29 23:14:20 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (gsubtest): new test case.
+	* gsubtest.awk, gsubtest.ok: new files.
+
+Fri Sep 20 11:58:40 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (prtoeval): new test case.
+	* prtoeval.awk, prtoeval.ok: new files.
+
+Tue Sep 10 06:26:44 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (gsubasgn): new test case.
+	* gsubasgn.awk, gsubasgn.ok: new files.
+
+Wed Aug 28 22:06:33 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* badargs.ok: updated output corresponding to change made to
+	  main.c (see main ChangeLog).
+
+Thu Aug  1 07:20:28 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (clean): remove out[123] files from `messages' test.
+	  Thanks to Pat Rankin (rankin@eql.caltech.edu).
+
+Sat Jul 27 23:56:57 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (prt1eval): new test case.
+	* prt1eval.awk, prt1eval.ok: new files.
+
+Mon Jul 22 22:06:10 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (eofsplit): new test case.
+	* eofsplit.awk, eofsplit.ok: new files.
+
+Sun Jul 14 07:07:45 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (fldchgnf): new test case.
+	* fldchgnf.awk, fldchgnf.ok: new files.
+
+Tue May 21 23:23:22 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (substr): new test case.
+	* substr.awk, substr.ok: new files.
+
+Tue May 14 15:05:23 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (dynlj): new test case.
+	* dynlj.awk, dynlj.ok: new files.
+
+Sun May 12 20:45:34 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (fnarray): new test case.
+	* fnarray.awk, fnarray.ok: new files.
+
+Fri Mar 15 06:46:48 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (clean): added `*~' to list of files to be removed.
+	* tweakfld.awk (END): added to do clean up action.
+
+Thu Mar 14 16:41:32 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (mmap8k): new test case.
+	* mmap8k.in, mmap8k.ok: new files.
+
+Sun Mar 10 22:58:35 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (clsflnam): new test case.
+	* clsflnam.in, clsflnam.awk, clsflnam.ok: new files.
+	* tweakfld.awk: changed to have a fixed date.
+
+Thu Mar  7 09:56:09 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (tweakfld): new test case.
+	* tweakfld.in, tweakfld.awk, tweakfld.ok: new files.
+
+Sun Mar  3 06:51:26 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (getlnhd, backgsub) : new test cases.
+	* getlnhd.awk, getlnhd.ok: new files.
+	* backgsub.in, backgsub.awk, backgsub.ok: new files.
+
+Mon Feb 26 22:30:02 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (sprintfc): new test case.
+	* sprintfc.in, sprintfc.awk, sprintfc.ok: new files.
+	* gensub.awk: updated for case of no match of regex.
+
+Wed Jan 24 10:06:16 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* Makefile.in (distclean, maintainer-clean): new targets.
+	  (reindops): added test from Rick Adams (rick@uunet.uu.net).
+	  (arrayparm, paramdup, defref, strftime, prmarscl, sclforin,
+	  sclifin): Fix from Larry Schwimmer (schwim@cyclone.stanford.edu)
+	  so that tests that are supposed to fail use `... || exit 0' to
+	  cause a clean `make clean'.
+
+Wed Jan 10 22:58:55 1996  Arnold D. Robbins  <arnold@skeeve.atl.ga.us>
+
+	* ChangeLog created.
diff --git a/contrib/awk/test/Makefile b/contrib/awk/test/Makefile
new file mode 100644
index 0000000..1a9168e
--- /dev/null
+++ b/contrib/awk/test/Makefile
@@ -0,0 +1,451 @@
+# Generated automatically from Makefile.in by configure.
+# Makefile for GNU Awk test suite.
+#
+# Copyright (C) 1988-1997 the Free Software Foundation, Inc.
+# 
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+# 
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+
+SHELL = /bin/sh
+AWK = ../gawk
+CMP = cmp
+
+srcdir = .
+
+bigtest:	basic unix-tests gawk.extensions
+
+basic:	msg swaplns messages argarray longwrds \
+	getline fstabplus compare arrayref rs fsrs rand \
+	fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
+	convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+	rswhite prmarscl sclforin sclifin intprec childin noeffect \
+	numsubstr pcntplus prmreuse math fldchg fldchgnf reindops \
+	sprintfc backgsub tweakfld clsflnam mmap8k fnarray \
+	dynlj substr eofsplit prt1eval gsubasgn prtoeval gsubtest splitwht \
+	back89 tradanch nlfldsep splitvar intest nfldstr nors fnarydel \
+	noparms funstack clobber delarprm prdupval
+
+unix-tests: poundbang fflush getlnhd pipeio1 pipeio2 strftlng pid
+
+gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
+		badargs strftime gensub gnureops reint nondec
+
+extra:	regtest inftest
+
+poundbang::
+	@cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
+	@rm -f /tmp/gawk
+	$(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
+
+msg::
+	@echo 'Any output from "cmp" is bad news, although some differences'
+	@echo 'in floating point values are probably benign -- in particular,'
+	@echo 'some systems may omit a leading zero and the floating point'
+	@echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+	@$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+	$(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
+
+messages::
+	@$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+	{ $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && \
+	$(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || \
+	{ { test -d /dev/fd || test -d /proc/self/fd; } && \
+	echo IT IS OK THAT THIS TEST FAILED; }
+
+argarray::
+	@case $(srcdir) in \
+	.)	: ;; \
+	*)	cp $(srcdir)/argarray.in . ;; \
+	esac
+	@TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+	$(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
+
+fstabplus::
+	@echo '1		2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+	$(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
+
+fsrs::
+	@$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+	$(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
+
+igncfs::
+	@$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+	$(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
+
+longwrds::
+	@$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+	$(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
+
+fieldwdth::
+	@echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+	$(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
+
+ignrcase::
+	@echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+	$(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
+
+regtest::
+	@echo 'Some of the output from regtest is very system specific, do not'
+	@echo 'be distressed if your output differs from that distributed.'
+	@echo 'Manual inspection is called for.'
+	AWK=`pwd`/$(AWK) $(srcdir)/regtest
+
+posix::
+	@echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+	$(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+
+manyfiles::
+	@rm -rf junk
+	@mkdir junk
+	@$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+	@$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
+	@echo "This number better be 1 ->" | tr -d '\012'
+	@wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+	@rm -rf junk _$@
+
+compare::
+	@$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+	$(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
+
+arrayref::
+	@$(AWK) -f $(srcdir)/arrayref.awk >_$@
+	$(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
+
+rs::
+	@$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+	$(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
+
+fsbs::
+	@$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+	$(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
+
+inftest::
+	@echo This test is very machine specific...
+	@$(AWK) -f $(srcdir)/inftest.awk >_$@
+	$(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+
+getline::
+	@$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+	$(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
+
+rand::
+	@$(AWK) -f $(srcdir)/rand.awk >_$@
+	$(CMP) $(srcdir)/rand.ok _$@ && rm -f _$@
+
+negexp::
+	@$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+	$(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
+
+asgext::
+	@$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+	$(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
+
+anchgsub::
+	@$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+	$(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
+
+splitargv::
+	@$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+	$(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
+
+awkpath::
+	@AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+	$(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
+
+nfset::
+	@$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+	$(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
+
+reparse::
+	@$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+	$(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
+
+argtest::
+	@$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+	$(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
+
+badargs::
+	@-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+	$(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
+
+convfmt::
+	@$(AWK) -f $(srcdir)/convfmt.awk >_$@
+	$(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
+
+arrayparm::
+	@-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
+
+paramdup::
+	@-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
+
+nonl::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+	$(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+	$(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+	: this test could fail on slow machines or on a second boundary,
+	: so if it does, double check the actual results
+	@LC_ALL=C; export LC_ALL; LANC=C; export LANG; \
+	date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; \
+	print > "strftime.ok" ; \
+	print strftime() > "'_$@'" }'
+	$(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok || exit 0
+
+litoct::
+	@echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+	$(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+	@$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+	$(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+	@echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+	$(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+	@$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+	$(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+	@-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+	@-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+	@-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+	@-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+	$(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+	@echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+	$(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+	$(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+	@-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+	$(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+	@$(AWK) -f $(srcdir)/gnureops.awk >_$@
+	$(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+	@$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+	$(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+	@$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+	$(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+	@$(AWK) -f $(srcdir)/math.awk >_$@
+	$(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+	@$(srcdir)/fflush.sh >_$@
+	$(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+	@$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+	$(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
+
+fldchgnf::
+	@$(AWK) -f $(srcdir)/fldchgnf.awk $(srcdir)/fldchgnf.in >_$@
+	$(CMP) $(srcdir)/fldchgnf.ok _$@ && rm -f _$@
+
+reindops::
+	@$(AWK) -f $(srcdir)/reindops.awk $(srcdir)/reindops.in >_$@
+	$(CMP) $(srcdir)/reindops.ok _$@ && rm -f _$@
+
+sprintfc::
+	@$(AWK) -f $(srcdir)/sprintfc.awk $(srcdir)/sprintfc.in >_$@
+	$(CMP) $(srcdir)/sprintfc.ok _$@ && rm -f _$@
+
+getlnhd::
+	@$(AWK) -f $(srcdir)/getlnhd.awk >_$@
+	$(CMP) $(srcdir)/getlnhd.ok _$@ && rm -f _$@
+
+backgsub::
+	@$(AWK) -f $(srcdir)/backgsub.awk $(srcdir)/backgsub.in >_$@
+	$(CMP) $(srcdir)/backgsub.ok _$@ && rm -f _$@
+
+tweakfld::
+	@$(AWK) -f $(srcdir)/tweakfld.awk $(srcdir)/tweakfld.in >_$@
+	@rm -f errors.cleanup
+	$(CMP) $(srcdir)/tweakfld.ok _$@ && rm -f _$@
+
+clsflnam::
+	@$(AWK) -f $(srcdir)/clsflnam.awk $(srcdir)/clsflnam.in >_$@
+	$(CMP) $(srcdir)/clsflnam.ok _$@ && rm -f _$@
+
+mmap8k::
+	@$(AWK) '{ print }' $(srcdir)/mmap8k.in >_$@
+	$(CMP) $(srcdir)/mmap8k.in _$@ && rm -f _$@
+
+fnarray::
+	@-AWKPATH=$(srcdir) $(AWK) -f fnarray.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/fnarray.ok _$@ && rm -f _$@
+
+dynlj::
+	@$(AWK) -f $(srcdir)/dynlj.awk >_$@
+	$(CMP) $(srcdir)/dynlj.ok _$@ && rm -f _$@
+
+substr::
+	@$(AWK) -f $(srcdir)/substr.awk >_$@
+	$(CMP) $(srcdir)/substr.ok _$@ && rm -f _$@
+
+eofsplit::
+	@$(AWK) -f $(srcdir)/eofsplit.awk >_$@
+	$(CMP) $(srcdir)/eofsplit.ok _$@ && rm -f _$@
+
+prt1eval::
+	@$(AWK) -f $(srcdir)/prt1eval.awk >_$@
+	$(CMP) $(srcdir)/prt1eval.ok _$@ && rm -f _$@
+
+gsubasgn::
+	@-AWKPATH=$(srcdir) $(AWK) -f gsubasgn.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/gsubasgn.ok _$@ && rm -f _$@
+
+prtoeval::
+	@$(AWK) -f $(srcdir)/prtoeval.awk >_$@
+	$(CMP) $(srcdir)/prtoeval.ok _$@ && rm -f _$@
+
+gsubtest::
+	@$(AWK) -f $(srcdir)/gsubtest.awk >_$@
+	$(CMP) $(srcdir)/gsubtest.ok _$@ && rm -f _$@
+
+splitwht::
+	@$(AWK) -f $(srcdir)/splitwht.awk >_$@
+	$(CMP) $(srcdir)/splitwht.ok _$@ && rm -f _$@
+
+back89::
+	@$(AWK) '/a\8b/' $(srcdir)/back89.in >_$@
+	$(CMP) $(srcdir)/back89.ok _$@ && rm -f _$@
+
+tradanch::
+	@$(AWK) --traditional -f $(srcdir)/tradanch.awk $(srcdir)/tradanch.in >_$@
+	$(CMP) $(srcdir)/tradanch.ok _$@ && rm -f _$@
+
+nlfldsep::
+	@$(AWK) -f $(srcdir)/nlfldsep.awk $(srcdir)/nlfldsep.in > _$@
+	$(CMP) $(srcdir)/nlfldsep.ok _$@ && rm -f _$@
+
+splitvar::
+	@$(AWK) -f $(srcdir)/splitvar.awk $(srcdir)/splitvar.in >_$@
+	$(CMP) $(srcdir)/splitvar.ok _$@ && rm -f _$@
+
+intest::
+	@$(AWK) -f $(srcdir)/intest.awk >_$@
+	$(CMP) $(srcdir)/intest.ok _$@ && rm -f _$@
+
+# AIX /bin/sh exec's the last command in a list, therefore issue a ":"
+# command so that pid.sh is fork'ed as a child before being exec'ed.
+pid::
+	@AWKPATH=$(srcdir) AWK=$(AWK) $(SHELL) $(srcdir)/pid.sh $$$$ > _`basename $@` ; :
+	$(CMP) $(srcdir)/pid.ok _`basename $@` && rm -f _`basename $@` _`basename $@`.in
+
+strftlng::
+	@TZ=UTC; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@
+	@if $(CMP) -s $(srcdir)/strftlng.ok _$@ ; then : ; else \
+ 	TZ=UTC0; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \
+	fi
+	$(CMP) $(srcdir)/strftlng.ok _$@ && rm -f _$@
+
+nfldstr::
+	@echo | $(AWK) '$$1 == 0 { print "bug" }' > _$@
+	$(CMP) $(srcdir)/nfldstr.ok _$@ && rm -f _$@
+
+nors::
+	@echo A B C D E | tr -d '\12' | $(AWK) '{ print $$NF }' - $(srcdir)/nors.in > _$@
+	$(CMP) $(srcdir)/nors.ok _$@ && rm -f _$@
+
+fnarydel::
+	@$(AWK) -f $(srcdir)/fnarydel.awk >_$@
+	$(CMP) $(srcdir)/fnarydel.ok _$@ && rm -f _$@
+
+reint::
+	@$(AWK) --re-interval -f $(srcdir)/reint.awk $(srcdir)/reint.in >_$@
+	$(CMP) $(srcdir)/reint.ok _$@ && rm -f _$@
+
+noparms::
+	@-AWKPATH=$(srcdir) $(AWK) -f noparms.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/noparms.ok _$@ && rm -f _$@
+
+pipeio1::
+	@$(AWK) -f $(srcdir)/pipeio1.awk >_$@
+	@rm -f test1 test2
+	$(CMP) $(srcdir)/pipeio1.ok _$@ && rm -f _$@
+
+pipeio2::
+	@$(AWK) -v SRCDIR=$(srcdir) -f $(srcdir)/pipeio2.awk >_$@
+	$(CMP) $(srcdir)/pipeio2.ok _$@ && rm -f _$@
+
+funstack::
+	@$(AWK) -f $(srcdir)/funstack.awk $(srcdir)/funstack.in >_$@
+	$(CMP) $(srcdir)/funstack.ok _$@ && rm -f _$@
+
+clobber::
+	@$(AWK) -f $(srcdir)/clobber.awk >_$@
+	$(CMP) $(srcdir)/clobber.ok seq && $(CMP) $(srcdir)/clobber.ok _$@ && rm -f _$@
+	@rm -f seq
+
+delarprm::
+	@$(AWK) -f $(srcdir)/delarprm.awk >_$@
+	$(CMP) $(srcdir)/delarprm.ok _$@ && rm -f _$@
+
+prdupval::
+	@$(AWK) -f $(srcdir)/prdupval.awk $(srcdir)/prdupval.in >_$@
+	$(CMP) $(srcdir)/prdupval.ok _$@ && rm -f _$@
+
+nondec::
+	@if grep BITOP ../config.h | grep define > /dev/null; \
+	then \
+		$(AWK) -f $(srcdir)/nondec.awk >_$@; \
+	else \
+		cp $(srcdir)/nondec.ok _$@; \
+	fi
+	$(CMP) $(srcdir)/nondec.ok _$@ && rm -f _$@
+
+clean:
+	rm -fr _* core junk out1 out2 out3 strftime.ok test1 test2 seq *~
+
+distclean: clean
+	rm -f Makefile
+
+maintainer-clean: distclean
diff --git a/contrib/awk/test/Makefile.in b/contrib/awk/test/Makefile.in
new file mode 100644
index 0000000..07d0cbc
--- /dev/null
+++ b/contrib/awk/test/Makefile.in
@@ -0,0 +1,451 @@
+# Makefile for GNU Awk test suite.
+#
+# Copyright (C) 1988-1997 the Free Software Foundation, Inc.
+# 
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+# 
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+
+SHELL = /bin/sh
+AWK = ../gawk
+CMP = cmp
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+bigtest:	basic unix-tests gawk.extensions
+
+basic:	msg swaplns messages argarray longwrds \
+	getline fstabplus compare arrayref rs fsrs rand \
+	fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
+	convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+	rswhite prmarscl sclforin sclifin intprec childin noeffect \
+	numsubstr pcntplus prmreuse math fldchg fldchgnf reindops \
+	sprintfc backgsub tweakfld clsflnam mmap8k fnarray \
+	dynlj substr eofsplit prt1eval gsubasgn prtoeval gsubtest splitwht \
+	back89 tradanch nlfldsep splitvar intest nfldstr nors fnarydel \
+	noparms funstack clobber delarprm prdupval
+
+unix-tests: poundbang fflush getlnhd pipeio1 pipeio2 strftlng pid
+
+gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
+		badargs strftime gensub gnureops reint nondec
+
+extra:	regtest inftest
+
+poundbang::
+	@cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
+	@rm -f /tmp/gawk
+	$(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
+
+msg::
+	@echo 'Any output from "cmp" is bad news, although some differences'
+	@echo 'in floating point values are probably benign -- in particular,'
+	@echo 'some systems may omit a leading zero and the floating point'
+	@echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+	@$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+	$(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
+
+messages::
+	@$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+	{ $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && \
+	$(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || \
+	{ { test -d /dev/fd || test -d /proc/self/fd; } && \
+	echo IT IS OK THAT THIS TEST FAILED; }
+
+argarray::
+	@case $(srcdir) in \
+	.)	: ;; \
+	*)	cp $(srcdir)/argarray.in . ;; \
+	esac
+	@TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+	$(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
+
+fstabplus::
+	@echo '1		2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+	$(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
+
+fsrs::
+	@$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+	$(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
+
+igncfs::
+	@$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+	$(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
+
+longwrds::
+	@$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+	$(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
+
+fieldwdth::
+	@echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+	$(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
+
+ignrcase::
+	@echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+	$(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
+
+regtest::
+	@echo 'Some of the output from regtest is very system specific, do not'
+	@echo 'be distressed if your output differs from that distributed.'
+	@echo 'Manual inspection is called for.'
+	AWK=`pwd`/$(AWK) $(srcdir)/regtest
+
+posix::
+	@echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+	$(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+
+manyfiles::
+	@rm -rf junk
+	@mkdir junk
+	@$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+	@$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
+	@echo "This number better be 1 ->" | tr -d '\012'
+	@wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+	@rm -rf junk _$@
+
+compare::
+	@$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+	$(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
+
+arrayref::
+	@$(AWK) -f $(srcdir)/arrayref.awk >_$@
+	$(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
+
+rs::
+	@$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+	$(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
+
+fsbs::
+	@$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+	$(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
+
+inftest::
+	@echo This test is very machine specific...
+	@$(AWK) -f $(srcdir)/inftest.awk >_$@
+	$(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+
+getline::
+	@$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+	$(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
+
+rand::
+	@$(AWK) -f $(srcdir)/rand.awk >_$@
+	$(CMP) $(srcdir)/rand.ok _$@ && rm -f _$@
+
+negexp::
+	@$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+	$(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
+
+asgext::
+	@$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+	$(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
+
+anchgsub::
+	@$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+	$(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
+
+splitargv::
+	@$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+	$(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
+
+awkpath::
+	@AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+	$(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
+
+nfset::
+	@$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+	$(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
+
+reparse::
+	@$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+	$(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
+
+argtest::
+	@$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+	$(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
+
+badargs::
+	@-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+	$(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
+
+convfmt::
+	@$(AWK) -f $(srcdir)/convfmt.awk >_$@
+	$(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
+
+arrayparm::
+	@-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
+
+paramdup::
+	@-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
+
+nonl::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+	$(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+	$(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+	: this test could fail on slow machines or on a second boundary,
+	: so if it does, double check the actual results
+	@LC_ALL=C; export LC_ALL; LANC=C; export LANG; \
+	date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; \
+	print > "strftime.ok" ; \
+	print strftime() > "'_$@'" }'
+	$(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok || exit 0
+
+litoct::
+	@echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+	$(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+	@$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+	$(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+	@echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+	$(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+	@$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+	$(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+	@-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+	@-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+	@-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+	@-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+	$(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+	@echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+	$(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+	@-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+	$(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+	@-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+	$(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+	@$(AWK) -f $(srcdir)/gnureops.awk >_$@
+	$(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+	@$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+	$(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+	@$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+	$(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+	@$(AWK) -f $(srcdir)/math.awk >_$@
+	$(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+	@$(srcdir)/fflush.sh >_$@
+	$(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+	@$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+	$(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
+
+fldchgnf::
+	@$(AWK) -f $(srcdir)/fldchgnf.awk $(srcdir)/fldchgnf.in >_$@
+	$(CMP) $(srcdir)/fldchgnf.ok _$@ && rm -f _$@
+
+reindops::
+	@$(AWK) -f $(srcdir)/reindops.awk $(srcdir)/reindops.in >_$@
+	$(CMP) $(srcdir)/reindops.ok _$@ && rm -f _$@
+
+sprintfc::
+	@$(AWK) -f $(srcdir)/sprintfc.awk $(srcdir)/sprintfc.in >_$@
+	$(CMP) $(srcdir)/sprintfc.ok _$@ && rm -f _$@
+
+getlnhd::
+	@$(AWK) -f $(srcdir)/getlnhd.awk >_$@
+	$(CMP) $(srcdir)/getlnhd.ok _$@ && rm -f _$@
+
+backgsub::
+	@$(AWK) -f $(srcdir)/backgsub.awk $(srcdir)/backgsub.in >_$@
+	$(CMP) $(srcdir)/backgsub.ok _$@ && rm -f _$@
+
+tweakfld::
+	@$(AWK) -f $(srcdir)/tweakfld.awk $(srcdir)/tweakfld.in >_$@
+	@rm -f errors.cleanup
+	$(CMP) $(srcdir)/tweakfld.ok _$@ && rm -f _$@
+
+clsflnam::
+	@$(AWK) -f $(srcdir)/clsflnam.awk $(srcdir)/clsflnam.in >_$@
+	$(CMP) $(srcdir)/clsflnam.ok _$@ && rm -f _$@
+
+mmap8k::
+	@$(AWK) '{ print }' $(srcdir)/mmap8k.in >_$@
+	$(CMP) $(srcdir)/mmap8k.in _$@ && rm -f _$@
+
+fnarray::
+	@-AWKPATH=$(srcdir) $(AWK) -f fnarray.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/fnarray.ok _$@ && rm -f _$@
+
+dynlj::
+	@$(AWK) -f $(srcdir)/dynlj.awk >_$@
+	$(CMP) $(srcdir)/dynlj.ok _$@ && rm -f _$@
+
+substr::
+	@$(AWK) -f $(srcdir)/substr.awk >_$@
+	$(CMP) $(srcdir)/substr.ok _$@ && rm -f _$@
+
+eofsplit::
+	@$(AWK) -f $(srcdir)/eofsplit.awk >_$@
+	$(CMP) $(srcdir)/eofsplit.ok _$@ && rm -f _$@
+
+prt1eval::
+	@$(AWK) -f $(srcdir)/prt1eval.awk >_$@
+	$(CMP) $(srcdir)/prt1eval.ok _$@ && rm -f _$@
+
+gsubasgn::
+	@-AWKPATH=$(srcdir) $(AWK) -f gsubasgn.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/gsubasgn.ok _$@ && rm -f _$@
+
+prtoeval::
+	@$(AWK) -f $(srcdir)/prtoeval.awk >_$@
+	$(CMP) $(srcdir)/prtoeval.ok _$@ && rm -f _$@
+
+gsubtest::
+	@$(AWK) -f $(srcdir)/gsubtest.awk >_$@
+	$(CMP) $(srcdir)/gsubtest.ok _$@ && rm -f _$@
+
+splitwht::
+	@$(AWK) -f $(srcdir)/splitwht.awk >_$@
+	$(CMP) $(srcdir)/splitwht.ok _$@ && rm -f _$@
+
+back89::
+	@$(AWK) '/a\8b/' $(srcdir)/back89.in >_$@
+	$(CMP) $(srcdir)/back89.ok _$@ && rm -f _$@
+
+tradanch::
+	@$(AWK) --traditional -f $(srcdir)/tradanch.awk $(srcdir)/tradanch.in >_$@
+	$(CMP) $(srcdir)/tradanch.ok _$@ && rm -f _$@
+
+nlfldsep::
+	@$(AWK) -f $(srcdir)/nlfldsep.awk $(srcdir)/nlfldsep.in > _$@
+	$(CMP) $(srcdir)/nlfldsep.ok _$@ && rm -f _$@
+
+splitvar::
+	@$(AWK) -f $(srcdir)/splitvar.awk $(srcdir)/splitvar.in >_$@
+	$(CMP) $(srcdir)/splitvar.ok _$@ && rm -f _$@
+
+intest::
+	@$(AWK) -f $(srcdir)/intest.awk >_$@
+	$(CMP) $(srcdir)/intest.ok _$@ && rm -f _$@
+
+# AIX /bin/sh exec's the last command in a list, therefore issue a ":"
+# command so that pid.sh is fork'ed as a child before being exec'ed.
+pid::
+	@AWKPATH=$(srcdir) AWK=$(AWK) $(SHELL) $(srcdir)/pid.sh $$$$ > _`basename $@` ; :
+	$(CMP) $(srcdir)/pid.ok _`basename $@` && rm -f _`basename $@` _`basename $@`.in
+
+strftlng::
+	@TZ=UTC; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@
+	@if $(CMP) -s $(srcdir)/strftlng.ok _$@ ; then : ; else \
+ 	TZ=UTC0; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \
+	fi
+	$(CMP) $(srcdir)/strftlng.ok _$@ && rm -f _$@
+
+nfldstr::
+	@echo | $(AWK) '$$1 == 0 { print "bug" }' > _$@
+	$(CMP) $(srcdir)/nfldstr.ok _$@ && rm -f _$@
+
+nors::
+	@echo A B C D E | tr -d '\12' | $(AWK) '{ print $$NF }' - $(srcdir)/nors.in > _$@
+	$(CMP) $(srcdir)/nors.ok _$@ && rm -f _$@
+
+fnarydel::
+	@$(AWK) -f $(srcdir)/fnarydel.awk >_$@
+	$(CMP) $(srcdir)/fnarydel.ok _$@ && rm -f _$@
+
+reint::
+	@$(AWK) --re-interval -f $(srcdir)/reint.awk $(srcdir)/reint.in >_$@
+	$(CMP) $(srcdir)/reint.ok _$@ && rm -f _$@
+
+noparms::
+	@-AWKPATH=$(srcdir) $(AWK) -f noparms.awk >_$@ 2>&1 || exit 0
+	$(CMP) $(srcdir)/noparms.ok _$@ && rm -f _$@
+
+pipeio1::
+	@$(AWK) -f $(srcdir)/pipeio1.awk >_$@
+	@rm -f test1 test2
+	$(CMP) $(srcdir)/pipeio1.ok _$@ && rm -f _$@
+
+pipeio2::
+	@$(AWK) -v SRCDIR=$(srcdir) -f $(srcdir)/pipeio2.awk >_$@
+	$(CMP) $(srcdir)/pipeio2.ok _$@ && rm -f _$@
+
+funstack::
+	@$(AWK) -f $(srcdir)/funstack.awk $(srcdir)/funstack.in >_$@
+	$(CMP) $(srcdir)/funstack.ok _$@ && rm -f _$@
+
+clobber::
+	@$(AWK) -f $(srcdir)/clobber.awk >_$@
+	$(CMP) $(srcdir)/clobber.ok seq && $(CMP) $(srcdir)/clobber.ok _$@ && rm -f _$@
+	@rm -f seq
+
+delarprm::
+	@$(AWK) -f $(srcdir)/delarprm.awk >_$@
+	$(CMP) $(srcdir)/delarprm.ok _$@ && rm -f _$@
+
+prdupval::
+	@$(AWK) -f $(srcdir)/prdupval.awk $(srcdir)/prdupval.in >_$@
+	$(CMP) $(srcdir)/prdupval.ok _$@ && rm -f _$@
+
+nondec::
+	@if grep BITOP ../config.h | grep define > /dev/null; \
+	then \
+		$(AWK) -f $(srcdir)/nondec.awk >_$@; \
+	else \
+		cp $(srcdir)/nondec.ok _$@; \
+	fi
+	$(CMP) $(srcdir)/nondec.ok _$@ && rm -f _$@
+
+clean:
+	rm -fr _* core junk out1 out2 out3 strftime.ok test1 test2 seq *~
+
+distclean: clean
+	rm -f Makefile
+
+maintainer-clean: distclean
diff --git a/contrib/awk/test/README b/contrib/awk/test/README
new file mode 100644
index 0000000..2343be2
--- /dev/null
+++ b/contrib/awk/test/README
@@ -0,0 +1,18 @@
+Mon Jan 22 13:08:58 EST 1996
+
+This directory contains the tests for gawk.  The tests use the
+following conventions.
+
+Given some aspect of gawk named `foo', there will be one or more
+of the following files:
+
+foo.awk		--- actual code for the test if not inline in the Makefile
+foo.in		--- the data for the test, if it needs data
+foo.ok		--- the expected results
+_foo		--- the actual results; generated at run time
+
+The _foo file will be left around if a test fails, allowing you to
+compare actual and expected results, in case they differ.
+
+If they do differ (other than strftime.ok and _strftime!), send in a
+bug report.  See the manual for the bug report procedure.
diff --git a/contrib/awk/test/anchgsub.awk b/contrib/awk/test/anchgsub.awk
new file mode 100644
index 0000000..52e8aa4
--- /dev/null
+++ b/contrib/awk/test/anchgsub.awk
@@ -0,0 +1 @@
+{ gsub(/^[ 	]*/, "", $0) ; print }
diff --git a/contrib/awk/test/anchgsub.in b/contrib/awk/test/anchgsub.in
new file mode 100644
index 0000000..b829d84
--- /dev/null
+++ b/contrib/awk/test/anchgsub.in
@@ -0,0 +1 @@
+  	This is a test, this is only a test.
diff --git a/contrib/awk/test/anchgsub.ok b/contrib/awk/test/anchgsub.ok
new file mode 100644
index 0000000..c33dfb9
--- /dev/null
+++ b/contrib/awk/test/anchgsub.ok
@@ -0,0 +1 @@
+This is a test, this is only a test.
diff --git a/contrib/awk/test/argarray.awk b/contrib/awk/test/argarray.awk
new file mode 100644
index 0000000..1960f9bd
--- /dev/null
+++ b/contrib/awk/test/argarray.awk
@@ -0,0 +1,14 @@
+BEGIN {
+	argn =  " argument" (ARGC > 1 ? "s" : "")
+	are  = ARGC > 1 ? "are" : "is"
+	print "here we have " ARGC argn
+	print "which " are
+	for (x = 0; x < ARGC; x++)
+		print "\t", ARGV[x]
+	print "Environment variable TEST=" ENVIRON["TEST"]
+	print "and the current input file is called \"" FILENAME "\""
+}
+
+FNR == 1 {
+	print "in main loop, this input file is known as \"" FILENAME "\""
+}
diff --git a/contrib/awk/test/argarray.in b/contrib/awk/test/argarray.in
new file mode 100644
index 0000000..bc93338
--- /dev/null
+++ b/contrib/awk/test/argarray.in
@@ -0,0 +1 @@
+this is a simple test file
diff --git a/contrib/awk/test/argarray.ok b/contrib/awk/test/argarray.ok
new file mode 100644
index 0000000..18eb841
--- /dev/null
+++ b/contrib/awk/test/argarray.ok
@@ -0,0 +1,9 @@
+here we have 3 arguments
+which are
+	 gawk
+	 ./argarray.in
+	 -
+Environment variable TEST=
+and the current input file is called ""
+in main loop, this input file is known as "./argarray.in"
+in main loop, this input file is known as "-"
diff --git a/contrib/awk/test/argtest.awk b/contrib/awk/test/argtest.awk
new file mode 100644
index 0000000..e7a1145
--- /dev/null
+++ b/contrib/awk/test/argtest.awk
@@ -0,0 +1,4 @@
+BEGIN {
+	for (i = 0; i < ARGC; i++)
+		printf("ARGV[%d] = %s\n", i, ARGV[i])
+}
diff --git a/contrib/awk/test/argtest.ok b/contrib/awk/test/argtest.ok
new file mode 100644
index 0000000..591bc64
--- /dev/null
+++ b/contrib/awk/test/argtest.ok
@@ -0,0 +1,4 @@
+ARGV[0] = gawk
+ARGV[1] = -x
+ARGV[2] = -y
+ARGV[3] = abc
diff --git a/contrib/awk/test/arrayparm.awk b/contrib/awk/test/arrayparm.awk
new file mode 100644
index 0000000..d6f34d9
--- /dev/null
+++ b/contrib/awk/test/arrayparm.awk
@@ -0,0 +1,21 @@
+#
+# Test program from:
+#
+# Date: Tue, 21 Feb 95 16:09:29 EST
+# From: emory!blackhawk.com!aaron (Aaron Sosnick)
+# 
+BEGIN {
+    foo[1]=1;
+    foo[2]=2;
+    bug1(foo);
+}
+function bug1(i) {
+    for (i in foo) {
+	bug2(i);
+	delete foo[i];
+	print i,1,bot[1];
+    }
+}
+function bug2(arg) {
+    bot[arg]=arg;
+}
diff --git a/contrib/awk/test/arrayparm.ok b/contrib/awk/test/arrayparm.ok
new file mode 100644
index 0000000..b315f7c
--- /dev/null
+++ b/contrib/awk/test/arrayparm.ok
@@ -0,0 +1 @@
+gawk: arrayparm.awk:18: fatal: attempt to use array `foo' in a scalar context
diff --git a/contrib/awk/test/arrayref.awk b/contrib/awk/test/arrayref.awk
new file mode 100644
index 0000000..144d41a
--- /dev/null
+++ b/contrib/awk/test/arrayref.awk
@@ -0,0 +1,13 @@
+	BEGIN { # foo[10] = 0		# put this line in and it will work
+		test(foo); print foo[1]
+		test2(foo2); print foo2[1]
+	}
+
+	function test(foo)
+	{
+		test2(foo)
+	}
+	function test2(bar)
+	{
+		bar[1] = 1
+	}
diff --git a/contrib/awk/test/arrayref.ok b/contrib/awk/test/arrayref.ok
new file mode 100644
index 0000000..6ed281c
--- /dev/null
+++ b/contrib/awk/test/arrayref.ok
@@ -0,0 +1,2 @@
+1
+1
diff --git a/contrib/awk/test/asgext.awk b/contrib/awk/test/asgext.awk
new file mode 100644
index 0000000..c7f1775
--- /dev/null
+++ b/contrib/awk/test/asgext.awk
@@ -0,0 +1 @@
+{ print $3; $4 = "a"; print }
diff --git a/contrib/awk/test/asgext.in b/contrib/awk/test/asgext.in
new file mode 100644
index 0000000..3743b5b
--- /dev/null
+++ b/contrib/awk/test/asgext.in
@@ -0,0 +1,3 @@
+1 2 3
+1
+1 2 3 4
diff --git a/contrib/awk/test/asgext.ok b/contrib/awk/test/asgext.ok
new file mode 100644
index 0000000..2c0df70
--- /dev/null
+++ b/contrib/awk/test/asgext.ok
@@ -0,0 +1,6 @@
+3
+1 2 3 a
+
+1   a
+3
+1 2 3 a
diff --git a/contrib/awk/test/awkpath.ok b/contrib/awk/test/awkpath.ok
new file mode 100644
index 0000000..6cffe1b
--- /dev/null
+++ b/contrib/awk/test/awkpath.ok
@@ -0,0 +1 @@
+Found it.
diff --git a/contrib/awk/test/back89.in b/contrib/awk/test/back89.in
new file mode 100644
index 0000000..b0a88f2
--- /dev/null
+++ b/contrib/awk/test/back89.in
@@ -0,0 +1,2 @@
+a8b
+a\8b
diff --git a/contrib/awk/test/back89.ok b/contrib/awk/test/back89.ok
new file mode 100644
index 0000000..e9ea4d5
--- /dev/null
+++ b/contrib/awk/test/back89.ok
@@ -0,0 +1 @@
+a8b
diff --git a/contrib/awk/test/backgsub.awk b/contrib/awk/test/backgsub.awk
new file mode 100644
index 0000000..bec7354
--- /dev/null
+++ b/contrib/awk/test/backgsub.awk
@@ -0,0 +1,4 @@
+{
+        gsub( "\\\\", "\\\\")
+        print
+}
diff --git a/contrib/awk/test/backgsub.in b/contrib/awk/test/backgsub.in
new file mode 100644
index 0000000..2d3f17f
--- /dev/null
+++ b/contrib/awk/test/backgsub.in
@@ -0,0 +1 @@
+\x\y\z
diff --git a/contrib/awk/test/backgsub.ok b/contrib/awk/test/backgsub.ok
new file mode 100644
index 0000000..e2e265f
--- /dev/null
+++ b/contrib/awk/test/backgsub.ok
@@ -0,0 +1 @@
+\\x\\y\\z
diff --git a/contrib/awk/test/badargs.ok b/contrib/awk/test/badargs.ok
new file mode 100644
index 0000000..c89e520
--- /dev/null
+++ b/contrib/awk/test/badargs.ok
@@ -0,0 +1,23 @@
+gawk: option requires an argument -- f
+Usage: gawk [POSIX or GNU style options] -f progfile [--] file ...
+	gawk [POSIX or GNU style options] [--] 'program' file ...
+POSIX options:		GNU long options:
+	-f progfile		--file=progfile
+	-F fs			--field-separator=fs
+	-v var=val		--assign=var=val
+	-m[fr] val
+	-W compat		--compat
+	-W copyleft		--copyleft
+	-W copyright		--copyright
+	-W help			--help
+	-W lint			--lint
+	-W lint-old		--lint-old
+	-W posix		--posix
+	-W re-interval		--re-interval
+	-W source=program-text	--source=program-text
+	-W traditional		--traditional
+	-W usage		--usage
+	-W version		--version
+
+Report bugs to bug-gnu-utils@prep.ai.mit.edu,
+with a Cc: to arnold@gnu.ai.mit.edu
diff --git a/contrib/awk/test/childin.ok b/contrib/awk/test/childin.ok
new file mode 100644
index 0000000..45b983b
--- /dev/null
+++ b/contrib/awk/test/childin.ok
@@ -0,0 +1 @@
+hi
diff --git a/contrib/awk/test/clobber.awk b/contrib/awk/test/clobber.awk
new file mode 100644
index 0000000..d6635f2
--- /dev/null
+++ b/contrib/awk/test/clobber.awk
@@ -0,0 +1,98 @@
+BEGIN {
+	print "000800" > "seq"
+	close("seq")
+	ARGV[1] = "seq"
+	ARGC = 2
+}
+
+{ printf "%06d", $1 + 1 >"seq";
+  printf "%06d", $1 + 1 }
+# Date: Mon, 20 Jan 1997 15:14:06 -0600 (CST)
+# From: Dave Bodenstab <emory!synet.net!imdave>
+# To: bug-gnu-utils@prep.ai.mit.edu
+# Subject: GNU awk 3.0.2 core dump
+# Cc: arnold@gnu.ai.mit.edu
+# 
+# The following program produces a core file on my FreeBSD system:
+# 
+# bash$ echo 000800 >/tmp/seq
+# bash$ gawk '{ printf "%06d", $1 + 1 >"/tmp/seq";
+# 	      printf "%06d", $1 + 1 }' /tmp/seq                  
+# 
+# This fragment comes from mgetty+sendfax.
+# 
+# Here is the trace:
+# 
+# Script started on Mon Jan 20 15:09:04 1997
+# bash$ gawk --version
+# GNU Awk 3.0.2
+# Copyright (C) 1989, 1991-1996 Free Software Foundation.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# bash$ gdb gawk
+# GDB is free software and you are welcome to distribute copies of it
+#  under certain conditions; type "show copying" to see the conditions.
+# There is absolutely no warranty for GDB; type "show warranty" for details.
+# GDB 4.13 (i386-unknown-freebsd), 
+# Copyright 1994 Free Software Foundation, Inc...
+# (gdb) shell echo 000800 >/tmp/seq
+# (gdb) r '{ printf "%06d", $1 + 1 >"/tmp/seq"; printf "%06d", $1 + 1 }(gdb) r '{ printf "%06d", $1 + 1 >"/tmp/seq"; printf "%06d", $1 + 1 }' /tmp/seq
+# Starting program: /scratch/archive/src/cmd/gnuawk-3.0.2/gawk '{ printf "%06d", $1 + 1 >"/tmp/seq"; printf "%06d", $1 + 1 }' /tmp/seq
+# 
+# Program received signal SIGBUS, Bus error.
+# 0xd86f in def_parse_field (up_to=1, buf=0x37704, len=6, fs=0x3b240, rp=0x0, 
+#     set=0xce6c <set_field>, n=0x0) at field.c:391
+# 391		sav = *end;
+# (gdb) bt
+# #0  0xd86f in def_parse_field (up_to=1, buf=0x37704, len=6, fs=0x3b240, 
+#     rp=0x0, set=0xce6c <set_field>, n=0x0) at field.c:391
+# #1  0xddb1 in get_field (requested=1, assign=0x0) at field.c:669
+# #2  0xc25d in r_get_lhs (ptr=0x3b9b4, assign=0x0) at eval.c:1339
+# #3  0x9ab0 in r_tree_eval (tree=0x3b9b4, iscond=0) at eval.c:604
+# #4  0xa5f1 in r_tree_eval (tree=0x3b9fc, iscond=0) at eval.c:745
+# #5  0x4661 in format_tree (fmt_string=0x3e040 "%06d", n0=0, carg=0x3ba20)
+#     at builtin.c:620
+# #6  0x5beb in do_sprintf (tree=0x3b96c) at builtin.c:809
+# #7  0x5cd5 in do_printf (tree=0x3ba8c) at builtin.c:844
+# #8  0x9271 in interpret (tree=0x3ba8c) at eval.c:465
+# #9  0x8ca3 in interpret (tree=0x3bbd0) at eval.c:308
+# #10 0x8c34 in interpret (tree=0x3bc18) at eval.c:292
+# #11 0xf069 in do_input () at io.c:312
+# #12 0x12ba9 in main (argc=3, argv=0xefbfd538) at main.c:393
+# (gdb) l
+# 386			*buf += len;
+# 387			return nf;
+# 388		}
+# 389	
+# 390		/* before doing anything save the char at *end */
+# 391		sav = *end;
+# 392		/* because it will be destroyed now: */
+# 393	
+# 394		*end = ' ';	/* sentinel character */
+# 395		for (; nf < up_to; scan++) {
+# (gdb) print end
+# $1 = 0x804d006 <Error reading address 0x804d006: No such file or directory>
+# (gdb) print buf
+# $2 = (char **) 0x37704
+# (gdb) print *buf
+# $3 = 0x804d000 <Error reading address 0x804d000: No such file or directory>
+# (gdb) q
+# The program is running.  Quit anyway (and kill it)? (y or n) y
+# bash$ exit
+# 
+# Script done on Mon Jan 20 15:11:07 1997
+# 
+# Dave Bodenstab
+# imdave@synet.net
diff --git a/contrib/awk/test/clobber.ok b/contrib/awk/test/clobber.ok
new file mode 100644
index 0000000..7105708
--- /dev/null
+++ b/contrib/awk/test/clobber.ok
@@ -0,0 +1 @@
+000801
\ No newline at end of file
diff --git a/contrib/awk/test/clsflnam.awk b/contrib/awk/test/clsflnam.awk
new file mode 100644
index 0000000..5392891
--- /dev/null
+++ b/contrib/awk/test/clsflnam.awk
@@ -0,0 +1,12 @@
+#! /usr/bin/awk -f
+BEGIN {
+  getline
+# print ("FILENAME =", FILENAME) > "/dev/stderr"
+  #Rewind the file
+  if (close(FILENAME)) {
+      print "Error " ERRNO " closing input file" > "/dev/stderr";
+      exit;   
+  }
+}
+{  print "Analysing ", $0 }
+
diff --git a/contrib/awk/test/clsflnam.in b/contrib/awk/test/clsflnam.in
new file mode 100644
index 0000000..a92d664
--- /dev/null
+++ b/contrib/awk/test/clsflnam.in
@@ -0,0 +1,3 @@
+line 1
+line 2
+line 3
diff --git a/contrib/awk/test/clsflnam.ok b/contrib/awk/test/clsflnam.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/compare.awk b/contrib/awk/test/compare.awk
new file mode 100644
index 0000000..39a88f3
--- /dev/null
+++ b/contrib/awk/test/compare.awk
@@ -0,0 +1,13 @@
+BEGIN {
+	if (ARGV[1]) print 1
+	ARGV[1] = ""
+	if (ARGV[2]) print 2
+	ARGV[2] = ""
+	if ("0") print "zero"
+	if ("") print "null"
+	if (0) print 0
+}
+{
+	if ($0) print $0
+	if ($1) print $1
+}
diff --git a/contrib/awk/test/compare.in b/contrib/awk/test/compare.in
new file mode 100644
index 0000000..1ab098b
--- /dev/null
+++ b/contrib/awk/test/compare.in
@@ -0,0 +1,4 @@
+0
+1
+0 1
+
diff --git a/contrib/awk/test/compare.ok b/contrib/awk/test/compare.ok
new file mode 100644
index 0000000..8241359
--- /dev/null
+++ b/contrib/awk/test/compare.ok
@@ -0,0 +1,5 @@
+2
+zero
+1
+1
+0 1
diff --git a/contrib/awk/test/convfmt.awk b/contrib/awk/test/convfmt.awk
new file mode 100644
index 0000000..90fd204
--- /dev/null
+++ b/contrib/awk/test/convfmt.awk
@@ -0,0 +1,10 @@
+BEGIN {
+	CONVFMT = "%2.2f"
+	a = 123.456
+	b = a ""                # give `a' string value also
+	printf "a = %s\n", a
+	CONVFMT = "%.6g"
+	printf "a = %s\n", a
+	a += 0                  # make `a' numeric only again
+	printf "a = %s\n", a    # use `a' as string
+}
diff --git a/contrib/awk/test/convfmt.ok b/contrib/awk/test/convfmt.ok
new file mode 100644
index 0000000..a7b66f7
--- /dev/null
+++ b/contrib/awk/test/convfmt.ok
@@ -0,0 +1,3 @@
+a = 123.46
+a = 123.456
+a = 123.456
diff --git a/contrib/awk/test/defref.awk b/contrib/awk/test/defref.awk
new file mode 100644
index 0000000..b4e8f10
--- /dev/null
+++ b/contrib/awk/test/defref.awk
@@ -0,0 +1 @@
+BEGIN { foo() }
diff --git a/contrib/awk/test/defref.ok b/contrib/awk/test/defref.ok
new file mode 100644
index 0000000..f833c96
--- /dev/null
+++ b/contrib/awk/test/defref.ok
@@ -0,0 +1,2 @@
+gawk: defref.awk:2: warning: function `foo' called but never defined
+gawk: defref.awk:1: fatal: function `foo' not defined
diff --git a/contrib/awk/test/delarprm.awk b/contrib/awk/test/delarprm.awk
new file mode 100644
index 0000000..d59de31
--- /dev/null
+++ b/contrib/awk/test/delarprm.awk
@@ -0,0 +1,50 @@
+# From dragon!unagi.cis.upenn.edu!sjanet Tue Mar 25 17:12:20 1997
+# Return-Path: <dragon!unagi.cis.upenn.edu!sjanet>
+# Received: by skeeve.atl.ga.us (/\==/\ Smail3.1.22.1 #22.1)
+# 	id <m0w9eS4-000GWyC@skeeve.atl.ga.us>; Tue, 25 Mar 97 17:12 EST
+# Received: by vecnet.com (DECUS UUCP /2.0/2.0/2.0/);
+#           Tue, 25 Mar 97 16:58:36 EDT
+# Received: from gnu-life.ai.mit.edu by antaries.vec.net (MX V4.2 VAX) with SMTP;
+#           Tue, 25 Mar 1997 16:58:26 EST
+# Received: from linc.cis.upenn.edu by gnu-life.ai.mit.edu (8.8.5/8.6.12GNU) with
+#           ESMTP id QAA24350 for <bug-gnu-utils@prep.ai.mit.edu>; Tue, 25 Mar
+#           1997 16:56:59 -0500 (EST)
+# Received: from unagi.cis.upenn.edu (UNAGI.CIS.UPENN.EDU [158.130.8.153]) by
+#           linc.cis.upenn.edu (8.8.5/8.8.5) with ESMTP id QAA09424; Tue, 25 Mar
+#           1997 16:56:54 -0500 (EST)
+# Received: (from sjanet@localhost) by unagi.cis.upenn.edu (8.8.5/8.8.5) id
+#           QAA03969; Tue, 25 Mar 1997 16:56:50 -0500 (EST)
+# Date: Tue, 25 Mar 1997 16:56:50 -0500 (EST)
+# From: Stan Janet <sjanet@unagi.cis.upenn.edu>
+# Message-ID: <199703252156.QAA03969@unagi.cis.upenn.edu>
+# To: bug-gnu-utils@prep.ai.mit.edu
+# CC: arnold@gnu.ai.mit.edu
+# Subject: GNU awk 3.0.2 bug: fatal error deleting local array inside function
+# Status: ORf
+# 
+# Version: GNU Awk 3.0.2
+# Platforms: SunOS 4.1.1 (compiled with Sun cc)
+#            IRIX 5.3 (compiled with SGI cc)
+# Problem: Deleting local array inside function causes fatal internal error (and
+# 	core dump. The error does not occur when the variable "x", unused in
+# 	the example, is removed or when the function is declared foo(x,p).
+# 	When the function is declared foo(p,x), adding a dummy line that uses
+# 	"x", e.g. "x=1" does not prevent the error. If "p" is not deleted,
+# 	there is no error. If "p[1]" is used to delete the lone element, there
+# 	is no error.
+# 
+# ==== The program x.gawk ====
+
+function foo(p,x) {
+	p[1]="bar"
+	delete p
+	return 0
+}
+
+BEGIN {
+	foo()
+}
+
+# ==== The output for "gawk -f x.gawk" (SunOS) ====
+# 
+# gawk: x.gawk:4: fatal error: internal error
diff --git a/contrib/awk/test/delarprm.ok b/contrib/awk/test/delarprm.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/dynlj.awk b/contrib/awk/test/dynlj.awk
new file mode 100644
index 0000000..ec6851b
--- /dev/null
+++ b/contrib/awk/test/dynlj.awk
@@ -0,0 +1 @@
+BEGIN { printf "%*sworld\n", -20, "hello" }
diff --git a/contrib/awk/test/dynlj.ok b/contrib/awk/test/dynlj.ok
new file mode 100644
index 0000000..c8f3fe9
--- /dev/null
+++ b/contrib/awk/test/dynlj.ok
@@ -0,0 +1 @@
+hello               world
diff --git a/contrib/awk/test/eofsplit.awk b/contrib/awk/test/eofsplit.awk
new file mode 100644
index 0000000..22042b4
--- /dev/null
+++ b/contrib/awk/test/eofsplit.awk
@@ -0,0 +1,68 @@
+# Date: 	Sat, 30 Mar 1996 12:47:17 -0800 (PST)
+# From: Charles Howes <chowes@grid.direct.ca>
+# To: bug-gnu-utils@prep.ai.mit.edu, arnold@gnu.ai.mit.edu
+# Subject: Bug in Gawk 3.0.0, sample code:
+# 
+#!/usr/local/bin/gawk -f
+#
+# Hello!  This is a bug report from chowes@direct.ca
+#
+# uname -a
+# SunOS hostname 5.5 Generic sun4m
+#
+# Gnu Awk (gawk) 3.0, patchlevel 0:
+BEGIN{
+FS=":"
+while ((getline < "/etc/passwd") > 0) {
+  r=$3
+  z=0
+  n[0]=1
+  }
+FS=" "
+}
+#gawk: fp.new:16: fatal error: internal error
+#Abort
+
+# #!/usr/local/bin/gawk -f
+# # Gnu Awk (gawk) 2.15, patchlevel 6
+# 
+# BEGIN{
+# f="/etc/passwd"
+# while (getline < f) n[0]=1
+# FS=" "
+# }
+# #gawk: /staff/chowes/bin/fp:7: fatal error: internal error
+# #Abort
+
+# These examples are not perfect coding style because I took a real
+# piece of code and tried to strip away anything that didn't make the error
+# message go away.
+# 
+# The interesting part of the 'truss' is:
+# 
+# fstat(3, 0xEFFFF278)				= 0
+# lseek(3, 0, SEEK_SET)				= 0
+# read(3, " r o o t : x : 0 : 1 : S".., 2291)	= 2291
+# brk(0x00050020)					= 0
+# brk(0x00052020)					= 0
+# read(3, 0x0004F4B8, 2291)			= 0
+# close(3)					= 0
+#     Incurred fault #6, FLTBOUNDS  %pc = 0x0001B810
+#       siginfo: SIGSEGV SEGV_MAPERR addr=0x00053000
+#     Received signal #11, SIGSEGV [caught]
+#       siginfo: SIGSEGV SEGV_MAPERR addr=0x00053000
+# write(2, " g a w k", 4)				= 4
+# write(2, " :  ", 2)				= 2
+# 
+# --
+# Charles Howes -- chowes@direct.ca                 Voice: (604) 691-1607
+# System Administrator                                Fax: (604) 691-1605
+# Internet Direct - 1050 - 555 West Hastings St - Vancouver, BC V6B 4N6
+# 
+# A sysadmin's life is a sorry one.  The only advantage he has over Emergency
+# Room doctors is that malpractice suits are rare.  On the other hand, ER
+# doctors never have to deal with patients installing new versions of their
+# own innards!   -Michael O'Brien
+# 
+#  "I think I know what may have gone wrong in the original s/w.
+#   It's a bug in the way it was written."  - Vagueness**n
diff --git a/contrib/awk/test/eofsplit.ok b/contrib/awk/test/eofsplit.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/fflush.ok b/contrib/awk/test/fflush.ok
new file mode 100644
index 0000000..4cf0df6
--- /dev/null
+++ b/contrib/awk/test/fflush.ok
@@ -0,0 +1,16 @@
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
diff --git a/contrib/awk/test/fflush.sh b/contrib/awk/test/fflush.sh
new file mode 100755
index 0000000..42d624c
--- /dev/null
+++ b/contrib/awk/test/fflush.sh
@@ -0,0 +1,16 @@
+#! /bin/sh
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat"}'
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");close("/dev/stdout");print "2nd"|"cat"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"sort"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"sort";close("sort")}'|cat
diff --git a/contrib/awk/test/fieldwdth.ok b/contrib/awk/test/fieldwdth.ok
new file mode 100644
index 0000000..51b4008
--- /dev/null
+++ b/contrib/awk/test/fieldwdth.ok
@@ -0,0 +1 @@
+345
diff --git a/contrib/awk/test/fldchg.awk b/contrib/awk/test/fldchg.awk
new file mode 100644
index 0000000..a8018f7
--- /dev/null
+++ b/contrib/awk/test/fldchg.awk
@@ -0,0 +1,8 @@
+{
+#	print "0:", $0
+	gsub("aa", "+")
+	print "1:", $0
+	$3 = "<" $3 ">"
+	print "2:", $0
+	print "2a:" "%" $1 "%" $2 "%" $3 "%" $4 "%" $5
+}
diff --git a/contrib/awk/test/fldchg.in b/contrib/awk/test/fldchg.in
new file mode 100644
index 0000000..f500c36
--- /dev/null
+++ b/contrib/awk/test/fldchg.in
@@ -0,0 +1 @@
+aa aab c d e f
diff --git a/contrib/awk/test/fldchg.ok b/contrib/awk/test/fldchg.ok
new file mode 100644
index 0000000..cc5032a
--- /dev/null
+++ b/contrib/awk/test/fldchg.ok
@@ -0,0 +1,3 @@
+1: + +b c d e f
+2: + +b <c> d e f
+2a:%+%+b%<c>%d%e
diff --git a/contrib/awk/test/fldchgnf.awk b/contrib/awk/test/fldchgnf.awk
new file mode 100644
index 0000000..fbb8f11
--- /dev/null
+++ b/contrib/awk/test/fldchgnf.awk
@@ -0,0 +1 @@
+{ OFS = ":"; $2 = ""; print $0; print NF }
diff --git a/contrib/awk/test/fldchgnf.in b/contrib/awk/test/fldchgnf.in
new file mode 100644
index 0000000..8e13e46
--- /dev/null
+++ b/contrib/awk/test/fldchgnf.in
@@ -0,0 +1 @@
+a b c d
diff --git a/contrib/awk/test/fldchgnf.ok b/contrib/awk/test/fldchgnf.ok
new file mode 100644
index 0000000..10b38ed
--- /dev/null
+++ b/contrib/awk/test/fldchgnf.ok
@@ -0,0 +1,2 @@
+a::c:d
+4
diff --git a/contrib/awk/test/fnarray.awk b/contrib/awk/test/fnarray.awk
new file mode 100644
index 0000000..92a18b9
--- /dev/null
+++ b/contrib/awk/test/fnarray.awk
@@ -0,0 +1,7 @@
+function foo(N) {
+  return 0
+}
+BEGIN {
+  Num = foo[c]
+}
+
diff --git a/contrib/awk/test/fnarray.ok b/contrib/awk/test/fnarray.ok
new file mode 100644
index 0000000..94beacd
--- /dev/null
+++ b/contrib/awk/test/fnarray.ok
@@ -0,0 +1 @@
+gawk: fnarray.awk:5: fatal: attempt to use function `foo' as array
diff --git a/contrib/awk/test/fnarydel.awk b/contrib/awk/test/fnarydel.awk
new file mode 100644
index 0000000..8a1264c
--- /dev/null
+++ b/contrib/awk/test/fnarydel.awk
@@ -0,0 +1,60 @@
+#!/usr/local/bin/gawk -f
+BEGIN {
+  process()
+}
+
+function process(aa,a) {
+  delete aa
+}
+
+BEGIN {
+	for (i = 1; i < 10; i++)
+		a[i] = i;
+
+	print "first loop"
+	for (i in a)
+		print a[i]
+
+	delete a
+
+	print "second loop"
+	for (i in a)
+		print a[i]
+
+	for (i = 1; i < 10; i++)
+		a[i] = i;
+
+	print "third loop"
+	for (i in a)
+		print a[i]
+
+	print "call func"
+	delit(a)
+
+	print "fourth loop"
+	for (i in a)
+		print a[i]
+
+	stressit()
+}
+
+function delit(arr)
+{
+	delete arr
+}
+
+function stressit(	array, i)
+{
+	delete array
+	array[4] = 4
+	array[5] = 5
+	delete array[5]
+	print "You should just see: 4 4"
+	for (i in array)
+		print i, array[i]
+	delete array
+	print "You should see nothing between this line"
+	for (i in array)
+		print i, array[i]
+	print "And this one"
+}
diff --git a/contrib/awk/test/fnarydel.ok b/contrib/awk/test/fnarydel.ok
new file mode 100644
index 0000000..7f3e453
--- /dev/null
+++ b/contrib/awk/test/fnarydel.ok
@@ -0,0 +1,27 @@
+first loop
+4
+5
+6
+7
+8
+9
+1
+2
+3
+second loop
+third loop
+4
+5
+6
+7
+8
+9
+1
+2
+3
+call func
+fourth loop
+You should just see: 4 4
+4 4
+You should see nothing between this line
+And this one
diff --git a/contrib/awk/test/fsbs.in b/contrib/awk/test/fsbs.in
new file mode 100644
index 0000000..0a102c3
--- /dev/null
+++ b/contrib/awk/test/fsbs.in
@@ -0,0 +1 @@
+1\2
diff --git a/contrib/awk/test/fsbs.ok b/contrib/awk/test/fsbs.ok
new file mode 100644
index 0000000..8d04f96
--- /dev/null
+++ b/contrib/awk/test/fsbs.ok
@@ -0,0 +1 @@
+1 2
diff --git a/contrib/awk/test/fsrs.awk b/contrib/awk/test/fsrs.awk
new file mode 100644
index 0000000..a001489
--- /dev/null
+++ b/contrib/awk/test/fsrs.awk
@@ -0,0 +1,8 @@
+BEGIN {
+       RS=""; FS="\n";
+       ORS=""; OFS="\n";
+      }
+{
+        split ($2,f," ")
+        print $0;
+}
diff --git a/contrib/awk/test/fsrs.in b/contrib/awk/test/fsrs.in
new file mode 100644
index 0000000..4b49d81
--- /dev/null
+++ b/contrib/awk/test/fsrs.in
@@ -0,0 +1,7 @@
+a b
+c d
+e f
+
+1 2
+3 4
+5 6
diff --git a/contrib/awk/test/fsrs.ok b/contrib/awk/test/fsrs.ok
new file mode 100644
index 0000000..7dafd65
--- /dev/null
+++ b/contrib/awk/test/fsrs.ok
@@ -0,0 +1,5 @@
+a b
+c d
+e f1 2
+3 4
+5 6
\ No newline at end of file
diff --git a/contrib/awk/test/fstabplus.awk b/contrib/awk/test/fstabplus.awk
new file mode 100644
index 0000000..748a44f
--- /dev/null
+++ b/contrib/awk/test/fstabplus.awk
@@ -0,0 +1,2 @@
+BEGIN   { FS = "\t+" }
+        { print $1, $2 }
diff --git a/contrib/awk/test/fstabplus.ok b/contrib/awk/test/fstabplus.ok
new file mode 100644
index 0000000..8d04f96
--- /dev/null
+++ b/contrib/awk/test/fstabplus.ok
@@ -0,0 +1 @@
+1 2
diff --git a/contrib/awk/test/funstack.awk b/contrib/awk/test/funstack.awk
new file mode 100644
index 0000000..ab85b45
--- /dev/null
+++ b/contrib/awk/test/funstack.awk
@@ -0,0 +1,977 @@
+### ====================================================================
+###  @Awk-file{
+###     author          = "Nelson H. F. Beebe",
+###     version         = "1.00",
+###     date            = "09 October 1996",
+###     time            = "15:57:06 MDT",
+###     filename        = "journal-toc.awk",
+###     address         = "Center for Scientific Computing
+###                        Department of Mathematics
+###                        University of Utah
+###                        Salt Lake City, UT 84112
+###                        USA",
+###     telephone       = "+1 801 581 5254",
+###     FAX             = "+1 801 581 4148",
+###     URL             = "http://www.math.utah.edu/~beebe",
+###     checksum        = "25092 977 3357 26493",
+###     email           = "beebe@math.utah.edu (Internet)",
+###     codetable       = "ISO/ASCII",
+###     keywords        = "BibTeX, bibliography, HTML, journal table of
+###                        contents",
+###     supported       = "yes",
+###     docstring       = "Create a journal cover table of contents from
+###                        <at>Article{...} entries in a journal BibTeX
+###                        .bib file for checking the bibliography
+###                        database against the actual journal covers.
+###                        The output can be either plain text, or HTML.
+###
+###                        Usage:
+###                            bibclean -max-width 0 BibTeX-file(s) | \
+###                                bibsort -byvolume | \
+###                                awk -f journal-toc.awk \
+###                                    [-v HTML=nnn] [-v INDENT=nnn] \
+###                                    [-v BIBFILEURL=url] >foo.toc
+###
+###                            or if the bibliography is already sorted
+###                            by volume,
+###
+###                            bibclean -max-width 0 BibTeX-file(s) | \
+###                                awk -f journal-toc.awk \
+###                                    [-v HTML=nnn] [-v INDENT=nnn] \
+###                                    [-v BIBFILEURL=url] >foo.toc
+###
+###                        A non-zero value of the command-line option,
+###                        HTML=nnn, results in HTML output instead of
+###                        the default plain ASCII text (corresponding
+###                        to HTML=0).  The
+###
+###                        The INDENT=nnn command-line option specifies
+###                        the number of blanks to indent each logical
+###                        level of HTML.  The default is INDENT=4.
+###                        INDENT=0 suppresses indentation.  The INDENT
+###                        option has no effect when the default HTML=0
+###                        (plain text output) option is in effect.
+###
+###                        When HTML output is selected, the
+###                        BIBFILEURL=url command-line option provides a
+###                        way to request hypertext links from table of
+###                        contents page numbers to the complete BibTeX
+###                        entry for the article.  These links are
+###                        created by appending a sharp (#) and the
+###                        citation label to the BIBFILEURL value, which
+###                        conforms with the practice of
+###                        bibtex-to-html.awk.
+###
+###                        The HTML output form may be useful as a more
+###                        compact representation of journal article
+###                        bibliography data than the original BibTeX
+###                        file provides.  Of course, the
+###                        table-of-contents format provides less
+###                        information, and is considerably more
+###                        troublesome for a computer program to parse.
+###
+###                        When URL key values are provided, they will
+###                        be used to create hypertext links around
+###                        article titles.  This supports journals that
+###                        provide article contents on the World-Wide
+###                        Web.
+###
+###                        For parsing simplicity, this program requires
+###                        that BibTeX
+###
+###                            key = "value"
+###
+###                        and
+###
+###                            @String{name = "value"}
+###
+###                        specifications be entirely contained on
+###                        single lines, which is readily provided by
+###                        the `bibclean -max-width 0' filter.  It also
+###                        requires that bibliography entries begin and
+###                        end at the start of a line, and that
+###                        quotation marks, rather than balanced braces,
+###                        delimit string values.  This is a
+###                        conventional format that again can be
+###                        guaranteed by bibclean.
+###
+###                        This program requires `new' awk, as described
+###                        in the book
+###
+###                            Alfred V. Aho, Brian W. Kernighan, and
+###                            Peter J. Weinberger,
+###                            ``The AWK Programming Language'',
+###                            Addison-Wesley (1988), ISBN
+###                            0-201-07981-X,
+###
+###                        such as provided by programs named (GNU)
+###                        gawk, nawk, and recent AT&T awk.
+###
+###                        The checksum field above contains a CRC-16
+###                        checksum as the first value, followed by the
+###                        equivalent of the standard UNIX wc (word
+###                        count) utility output of lines, words, and
+###                        characters.  This is produced by Robert
+###                        Solovay's checksum utility.",
+###  }
+### ====================================================================
+
+BEGIN						{ initialize() }
+
+/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *{/		{ do_String(); next }
+
+/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/	{ next }
+
+/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/		{ do_Article(); next }
+
+/^ *@/						{ do_Other(); next }
+
+/^ *author *= *\"/ 				{ do_author(); next }
+
+/^ *journal *= */				{ do_journal(); next }
+
+/^ *volume *= *\"/				{ do_volume(); next }
+
+/^ *number *= *\"/				{ do_number(); next }
+
+/^ *year *= *\"/				{ do_year(); next }
+
+/^ *month *= */					{ do_month(); next }
+
+/^ *title *= *\"/				{ do_title(); next }
+
+/^ *pages *= *\"/				{ do_pages(); next }
+
+/^ *URL *= *\"/					{ do_URL(); next }
+
+/^ *} *$/					{ if (In_Article) do_end_entry(); next }
+
+END						{ terminate() }
+
+
+########################################################################
+# NB: The programming conventions for variables in this program are:   #
+#	UPPERCASE		global constants and user options      #
+#	Initialuppercase	global variables                       #
+#	lowercase		local variables                        #
+# Any deviation is an error!                                           #
+########################################################################
+
+
+function do_Article()
+{
+	In_Article = 1
+
+	Citation_label = $0
+	sub(/^[^\{]*{/,"",Citation_label)
+	sub(/ *, *$/,"",Citation_label)
+
+	Author = ""
+        Title = ""
+        Journal = ""
+        Volume = ""
+        Number = ""
+        Month = ""
+        Year = ""
+        Pages = ""
+        Url = ""
+}
+
+
+function do_author()
+{
+	Author = TeX_to_HTML(get_value($0))
+}
+
+
+function do_end_entry( k,n,parts)
+{
+	n = split(Author,parts," and ")
+	if (Last_number != Number)
+		do_new_issue()
+	for (k = 1; k < n; ++k)
+		print_toc_line(parts[k] " and", "", "")
+	Title_prefix = html_begin_title()
+	Title_suffix = html_end_title()
+	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
+		print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
+	else			# need to split long title over multiple lines
+		do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
+}
+
+
+function do_journal()
+{
+	if ($0 ~ /[=] *"/)	# have journal = "quoted journal name",
+		Journal = get_value($0)
+	else			# have journal = journal-abbreviation,
+	{
+        	Journal = get_abbrev($0)
+		if (Journal in String) # replace abbrev by its expansion
+			Journal = String[Journal]
+	}
+	gsub(/\\-/,"",Journal)	# remove discretionary hyphens
+}
+
+
+function do_long_title(author,title,pages, last_title,n)
+{
+	title = trim(title)			# discard leading and trailing space
+	while (length(title) > 0)
+	{
+		n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
+		last_title = substr(title,1,n)
+		title = substr(title,n+1)
+		sub(/^ +/,"",title)		# discard any leading space
+		print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
+		author = ""
+	}
+}
+
+
+function do_month( k,n,parts)
+{
+	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
+	gsub(/[\"]/,"",Month)
+	gsub(/ *# *\\slash *# */," / ",Month)
+	gsub(/ *# *-+ *# */," / ",Month)
+	n = split(Month,parts," */ *")
+	Month = ""
+	for (k = 1; k <= n; ++k)
+		Month = Month ((k > 1) ? " / " : "") \
+			((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
+}
+
+
+function do_new_issue()
+{
+	Last_number = Number
+	if (HTML)
+	{
+		if (Last_volume != Volume)
+		{
+			Last_volume = Volume
+			print_line(prefix(2) "<BR>")
+		}
+		html_end_toc()
+		html_begin_issue()
+		print_line(prefix(2) Journal "<BR>")
+	}
+	else
+	{
+		print_line("")
+		print_line(Journal)
+	}
+
+	print_line(strip_html(vol_no_month_year()))
+
+	if (HTML)
+	{
+		html_end_issue()
+		html_toc_entry()
+		html_begin_toc()
+	}
+	else
+		print_line("")
+}
+
+
+function do_number()
+{
+	Number = get_value($0)
+}
+
+
+function do_Other()
+{
+	In_Article = 0
+}
+
+
+function do_pages()
+{
+	Pages = get_value($0)
+	sub(/--[?][?]/,"",Pages)
+}
+
+
+function do_String()
+{
+	sub(/^[^\{]*\{/,"",$0)	# discard up to and including open brace
+	sub(/\} *$/,"",$0)	# discard from optional whitespace and trailing brace to end of line
+	String[get_key($0)] = get_value($0)
+}
+
+
+function do_title()
+{
+	Title = TeX_to_HTML(get_value($0))
+}
+
+
+function do_URL( parts)
+{
+	Url = get_value($0)
+	split(Url,parts,"[,;]")			# in case we have multiple URLs
+	Url = trim(parts[1])
+}
+
+
+function do_volume()
+{
+	Volume = get_value($0)
+}
+
+
+function do_year()
+{
+	Year = get_value($0)
+}
+
+
+function get_abbrev(s)
+{	# return abbrev from ``key = abbrev,''
+	sub(/^[^=]*= */,"",s)	# discard text up to start of non-blank value
+	sub(/ *,? *$/,"",s)	# discard trailing optional whitspace, quote,
+				# optional comma, and optional space
+	return (s)
+}
+
+
+function get_key(s)
+{	# return kay from ``key = "value",''
+	sub(/^ */,"",s)		# discard leading space
+	sub(/ *=.*$/,"",s)	# discard everthing after key
+
+	return (s)
+}
+
+
+function get_value(s)
+{	# return value from ``key = "value",''
+	sub(/^[^\"]*\" */,"",s)	# discard text up to start of non-blank value
+	sub(/ *\",? *$/,"",s)	# discard trailing optional whitspace, quote,
+				# optional comma, and optional space
+	return (s)
+}
+
+
+function html_accents(s)
+{
+	if (index(s,"\\") > 0)			# important optimization
+	{
+		# Convert common lower-case accented letters according to the
+		# table on p. 169 of in Peter Flynn's ``The World Wide Web
+		# Handbook'', International Thomson Computer Press, 1995, ISBN
+		# 1-85032-205-8.  The official table of ISO Latin 1 SGML
+		# entities used in HTML can be found in the file
+		# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
+		# may differ).
+
+		gsub(/{\\\a}/,	"\\&agrave;",	s)
+		gsub(/{\\'a}/,	"\\&aacute;",	s)
+		gsub(/{\\[\^]a}/,"\\&acirc;",	s)
+		gsub(/{\\~a}/,	"\\&atilde;",	s)
+		gsub(/{\\\"a}/,	"\\&auml;",	s)
+		gsub(/{\\aa}/,	"\\&aring;",	s)
+		gsub(/{\\ae}/,	"\\&aelig;",	s)
+
+		gsub(/{\\c{c}}/,"\\&ccedil;",	s)
+
+		gsub(/{\\\e}/,	"\\&egrave;",	s)
+		gsub(/{\\'e}/,	"\\&eacute;",	s)
+		gsub(/{\\[\^]e}/,"\\&ecirc;",	s)
+		gsub(/{\\\"e}/,	"\\&euml;",	s)
+
+		gsub(/{\\\i}/,	"\\&igrave;",	s)
+		gsub(/{\\'i}/,	"\\&iacute;",	s)
+		gsub(/{\\[\^]i}/,"\\&icirc;",	s)
+		gsub(/{\\\"i}/,	"\\&iuml;",	s)
+
+		# ignore eth and thorn
+
+		gsub(/{\\~n}/,	"\\&ntilde;",	s)
+
+		gsub(/{\\\o}/,	"\\&ograve;",	s)
+		gsub(/{\\'o}/,	"\\&oacute;",	s)
+		gsub(/{\\[\^]o}/, "\\&ocirc;",	s)
+		gsub(/{\\~o}/,	"\\&otilde;",	s)
+		gsub(/{\\\"o}/,	"\\&ouml;",	s)
+		gsub(/{\\o}/,	"\\&oslash;",	s)
+
+		gsub(/{\\\u}/,	"\\&ugrave;",	s)
+		gsub(/{\\'u}/,	"\\&uacute;",	s)
+		gsub(/{\\[\^]u}/,"\\&ucirc;",	s)
+		gsub(/{\\\"u}/,	"\\&uuml;",	s)
+
+		gsub(/{\\'y}/,	"\\&yacute;",	s)
+		gsub(/{\\\"y}/,	"\\&yuml;",	s)
+
+		# Now do the same for upper-case accents
+
+		gsub(/{\\\A}/,	"\\&Agrave;",	s)
+		gsub(/{\\'A}/,	"\\&Aacute;",	s)
+		gsub(/{\\[\^]A}/,	"\\&Acirc;",	s)
+		gsub(/{\\~A}/,	"\\&Atilde;",	s)
+		gsub(/{\\\"A}/,	"\\&Auml;",	s)
+		gsub(/{\\AA}/,	"\\&Aring;",	s)
+		gsub(/{\\AE}/,	"\\&AElig;",	s)
+
+		gsub(/{\\c{C}}/,"\\&Ccedil;",	s)
+
+		gsub(/{\\\e}/,	"\\&Egrave;",	s)
+		gsub(/{\\'E}/,	"\\&Eacute;",	s)
+		gsub(/{\\[\^]E}/,	"\\&Ecirc;",	s)
+		gsub(/{\\\"E}/,	"\\&Euml;",	s)
+
+		gsub(/{\\\I}/,	"\\&Igrave;",	s)
+		gsub(/{\\'I}/,	"\\&Iacute;",	s)
+		gsub(/{\\[\^]I}/,	"\\&Icirc;",	s)
+		gsub(/{\\\"I}/,	"\\&Iuml;",	s)
+
+		# ignore eth and thorn
+
+		gsub(/{\\~N}/,	"\\&Ntilde;",	s)
+
+		gsub(/{\\\O}/,	"\\&Ograve;",	s)
+		gsub(/{\\'O}/,	"\\&Oacute;",	s)
+		gsub(/{\\[\^]O}/,	"\\&Ocirc;",	s)
+		gsub(/{\\~O}/,	"\\&Otilde;",	s)
+		gsub(/{\\\"O}/,	"\\&Ouml;",	s)
+		gsub(/{\\O}/,	"\\&Oslash;",	s)
+
+		gsub(/{\\\U}/,	"\\&Ugrave;",	s)
+		gsub(/{\\'U}/,	"\\&Uacute;",	s)
+		gsub(/{\\[\^]U}/,	"\\&Ucirc;",	s)
+		gsub(/{\\\"U}/,	"\\&Uuml;",	s)
+
+		gsub(/{\\'Y}/,	"\\&Yacute;",	s)
+
+		gsub(/{\\ss}/,	"\\&szlig;",	s)
+
+		# Others not mentioned in Flynn's book
+		gsub(/{\\'\\i}/,"\\&iacute;",	s)
+		gsub(/{\\'\\j}/,"j",		s)
+	}
+	return (s)
+}
+
+
+function html_begin_issue()
+{
+	print_line("")
+	print_line(prefix(2) "<HR>")
+	print_line("")
+	print_line(prefix(2) "<H1>")
+	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
+}
+
+
+function html_begin_pages()
+{
+	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
+}
+
+
+function html_begin_pre()
+{
+	In_PRE = 1
+	print_line("<PRE>")
+}
+
+
+function html_begin_title()
+{
+	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
+}
+
+
+function html_begin_toc()
+{
+	html_end_toc()
+	html_begin_pre()
+}
+
+
+function html_body( k)
+{
+	for (k = 1; k <= BodyLines; ++k)
+		print Body[k]
+}
+
+function html_breakpoint(title,maxlength, break_after,k)
+{
+	# Return the largest character position in title AFTER which we
+	# can break the title across lines, without exceeding maxlength
+	# visible characters.
+	if (html_length(title) > maxlength)	# then need to split title across lines
+	{
+		# In the presence of HTML markup, the initialization of
+		# k here is complicated, because we need to advance it
+		# until html_length(title) is at least maxlength,
+		# without invoking the expensive html_length() function
+		# too frequently.  The need to split the title makes the
+		# alternative of delayed insertion of HTML markup much
+		# more complicated.
+		break_after = 0
+		for (k = min(maxlength,length(title)); k < length(title); ++k)
+		{
+			if (substr(title,k+1,1) == " ")
+			{		# could break after position k
+				if (html_length(substr(title,1,k)) <= maxlength)
+					break_after = k
+				else	# advanced too far, retreat back to last break_after
+					break
+			}
+		}
+		if (break_after == 0)		# no breakpoint found by forward scan
+		{				# so switch to backward scan
+			for (k = min(maxlength,length(title)) - 1; \
+				(k > 0) && (substr(title,k+1,1) != " "); --k)
+				;		# find space at which to break title
+			if (k < 1)		# no break point found
+				k = length(title) # so must print entire string
+		}
+		else
+			k = break_after
+	}
+	else					# title fits on one line
+		k = length(title)
+	return (k)
+}
+
+
+
+function html_end_issue()
+{
+	print_line(prefix(3) "</A>")
+	print_line(prefix(2) "</H1>")
+}
+
+
+function html_end_pages()
+{
+	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
+}
+
+
+function html_end_pre()
+{
+	if (In_PRE)
+	{
+		print_line("</PRE>")
+		In_PRE = 0
+	}
+}
+
+
+function html_end_title()
+{
+	return ((HTML && (Url != "")) ? "</A>" : "")
+}
+
+
+function html_end_toc()
+{
+	html_end_pre()
+}
+
+
+function html_fonts(s, arg,control_word,k,level,n,open_brace)
+{
+	open_brace = index(s,"{")
+	if (open_brace > 0)			# important optimization
+	{
+		level = 1
+		for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
+		{
+			if (substr(s,k,1) == "{")
+				level++
+			else if (substr(s,k,1) == "}")
+				level--
+		}
+
+		# {...} is now found at open_brace ... (k-1)
+		for (control_word in Font_decl_map)	# look for {\xxx ...}
+		{
+			if (substr(s,open_brace+1,length(control_word)+1) ~ \
+				("\\" control_word "[^A-Za-z]"))
+			{
+				n = open_brace + 1 + length(control_word)
+				arg = trim(substr(s,n,k - n))
+				if (Font_decl_map[control_word] == "toupper") # arg -> ARG
+					arg = toupper(arg)
+				else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
+					arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
+				return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
+			}
+		}
+		for (control_word in Font_cmd_map)	# look for \xxx{...}
+		{
+			if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
+				("\\" control_word))
+			{
+				n = open_brace + 1
+				arg = trim(substr(s,n,k - n))
+				if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
+					arg = toupper(arg)
+				else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
+					arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
+				n = open_brace - length(control_word) - 1
+				return (substr(s,1,n) arg html_fonts(substr(s,k)))
+			}
+		}
+	}
+	return (s)
+}
+
+
+function html_header()
+{
+	USER = ENVIRON["USER"]
+	if (USER == "")
+	    USER = ENVIRON["LOGNAME"]
+	if (USER == "")
+	    USER = "????"
+	"hostname" | getline HOSTNAME
+	"date" | getline DATE
+	("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
+	if (PERSONAL_NAME == "")
+	    ("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
+
+
+	print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"
+	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
+	print "<!-- on " DATE " -->"
+	print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
+	print ""
+	print ""
+	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
+	print ""
+	print "<HTML>"
+	print prefix(1) "<HEAD>"
+	print prefix(2) "<TITLE>"
+	print prefix(3)  Journal
+	print prefix(2) "</TITLE>"
+	print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
+	print prefix(1) "</HEAD>"
+	print ""
+	print prefix(1) "<BODY>"
+}
+
+
+function html_label( label)
+{
+	label = Volume "(" Number "):" Month ":" Year
+	gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
+	return (label)
+}
+
+
+function html_length(s)
+{	# Return visible length of s, ignoring any HTML markup
+	if (HTML)
+	{
+		gsub(/<\/?[^>]*>/,"",s)		# remove SGML tags
+		gsub(/&[A-Za-z0-9]+;/,"",s)	# remove SGML entities
+	}
+	return (length(s))
+}
+
+
+function html_toc()
+{
+	print prefix(2) "<H1>"
+	print prefix(3) "Table of contents for issues of " Journal
+	print prefix(2) "</H1>"
+	print HTML_TOC
+}
+
+
+function html_toc_entry()
+{
+	HTML_TOC = HTML_TOC "        <A HREF=\"#" html_label() "\">"
+	HTML_TOC = HTML_TOC vol_no_month_year()
+	HTML_TOC = HTML_TOC "</A><BR>" "\n"
+}
+
+
+function html_trailer()
+{
+	html_end_pre()
+	print prefix(1) "</BODY>"
+	print "</HTML>"
+}
+
+
+function initialize()
+{
+	# NB: Update these when the program changes
+	VERSION_DATE = "[09-Oct-1996]"
+	VERSION_NUMBER = "1.00"
+
+	HTML = (HTML == "") ? 0 : (0 + HTML)
+
+	if (INDENT == "")
+		INDENT = 4
+
+	if (HTML == 0)
+		INDENT = 0	# indentation suppressed in ASCII mode
+
+	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
+
+	MAX_TITLE_CHARS = 36	# 36 produces a 79-char output line when there is
+				# just an initial page number.  If this is
+				# increased, the LEADERS string may need to be
+				# lengthened.
+
+	MIN_LEADERS = 4		# Minimum number of characters from LEADERS
+				# required when leaders are used.  The total
+				# number of characters that can appear in a
+				# title line is MAX_TITLE_CHARS + MIN_LEADERS.
+				# Leaders are omitted when the title length is
+				# between MAX_TITLE_CHARS and this sum.
+
+	MIN_LEADERS_SPACE = "        "	# must be at least MIN_LEADERS characters long
+
+	Month_expansion["jan"]	= "January"
+	Month_expansion["feb"]	= "February"
+	Month_expansion["mar"]	= "March"
+	Month_expansion["apr"]	= "April"
+	Month_expansion["may"]	= "May"
+	Month_expansion["jun"]	= "June"
+	Month_expansion["jul"]	= "July"
+	Month_expansion["aug"]	= "August"
+	Month_expansion["sep"]	= "September"
+	Month_expansion["oct"]	= "October"
+	Month_expansion["nov"]	= "November"
+	Month_expansion["dec"]	= "December"
+
+	Font_cmd_map["\\emph"]		= "EM"
+	Font_cmd_map["\\textbf"]	= "B"
+	Font_cmd_map["\\textit"]	= "I"
+	Font_cmd_map["\\textmd"]	= ""
+	Font_cmd_map["\\textrm"]	= ""
+	Font_cmd_map["\\textsc"]	= "toupper"
+	Font_cmd_map["\\textsl"]	= "I"
+	Font_cmd_map["\\texttt"]	= "t"
+	Font_cmd_map["\\textup"]	= ""
+
+	Font_decl_map["\\bf"]		= "B"
+	Font_decl_map["\\em"]		= "EM"
+	Font_decl_map["\\it"]		= "I"
+	Font_decl_map["\\rm"]		= ""
+	Font_decl_map["\\sc"]		= "toupper"
+	Font_decl_map["\\sf"]		= ""
+	Font_decl_map["\\tt"]		= "TT"
+	Font_decl_map["\\itshape"]	= "I"
+	Font_decl_map["\\upshape"]	= ""
+	Font_decl_map["\\slshape"]	= "I"
+	Font_decl_map["\\scshape"]	= "toupper"
+	Font_decl_map["\\mdseries"]	= ""
+	Font_decl_map["\\bfseries"]	= "B"
+	Font_decl_map["\\rmfamily"]	= ""
+	Font_decl_map["\\sffamily"]	= ""
+	Font_decl_map["\\ttfamily"]	= "TT"
+}
+
+function min(a,b)
+{
+	return (a < b) ? a : b
+}
+
+
+function prefix(level)
+{
+	# Return a prefix of up to 60 blanks
+
+	if (In_PRE)
+		return ("")
+	else
+		return (substr("                                                            ", \
+			1, INDENT * level))
+}
+
+
+function print_line(line)
+{
+	if (HTML)		# must buffer in memory so that we can accumulate TOC
+		Body[++BodyLines] = line
+	else
+		print line
+}
+
+
+function print_toc_line(author,title,pages, extra,leaders,n,t)
+{
+	# When we have a multiline title, the hypertext link goes only
+	# on the first line.  A multiline hypertext link looks awful
+	# because of long underlines under the leading indentation.
+
+	if (pages == "")	# then no leaders needed in title lines other than last one
+		t = sprintf("%31s   %s%s%s", author, Title_prefix, title, Title_suffix)
+	else					# last title line, with page number
+	{
+		n = html_length(title)		# potentially expensive
+		extra = n % 2			# extra space for aligned leader dots
+		if (n <= MAX_TITLE_CHARS) 	# then need leaders
+			leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
+				   min(MAX_TITLE_CHARS,n))
+		else				# title (almost) fills line, so no leaders
+			leaders = substr(MIN_LEADERS_SPACE,1, \
+					 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
+		t = sprintf("%31s   %s%s%s%s%s %4s", \
+			    author, Title_prefix, title, Title_suffix, \
+			    (extra ? " " : ""), leaders, pages)
+	}
+
+	Title_prefix = ""	# forget any hypertext
+	Title_suffix = ""	# link material
+
+	# Efficency note: an earlier version accumulated the body in a
+	# single scalar like this: "Body = Body t".  Profiling revealed
+	# this statement as the major hot spot, and the change to array
+	# storage made the program more than twice as fast.  This
+	# suggests that awk might benefit from an optimization of
+	# "s = s t" that uses realloc() instead of malloc().
+	if (HTML)
+		Body[++BodyLines] = t
+	else
+		print t
+}
+
+
+function protect_SGML_characters(s)
+{
+    gsub(/&/,"\\&amp;",s)	# NB: this one MUST be first
+    gsub(/</,"\\&lt;",s)
+    gsub(/>/,"\\&gt;",s)
+    gsub(/\"/,"\\&quot;",s)
+    return (s)
+}
+
+
+function strip_braces(s, k)
+{	# strip non-backslashed braces from s and return the result
+
+	return (strip_char(strip_char(s,"{"),"}"))
+}
+
+
+function strip_char(s,c, k)
+{	# strip non-backslashed instances of c from s, and return the result
+	k = index(s,c)
+	if (k > 0)		# then found the character
+	{
+		if (substr(s,k-1,1) != "\\") # then not backslashed char
+			s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
+		else		# preserve backslashed char
+			s = substr(s,1,k) strip_char(s,k+1,c)
+	}
+	return (s)
+}
+
+
+function strip_html(s)
+{
+	gsub(/<\/?[^>]*>/,"",s)
+	return (s)
+}
+
+
+function terminate()
+{
+	if (HTML)
+	{
+		html_end_pre()
+
+		HTML = 0	# NB: stop line buffering
+		html_header()
+		html_toc()
+		html_body()
+		html_trailer()
+	}
+}
+
+
+function TeX_to_HTML(s, k,n,parts)
+{
+	# First convert the four SGML reserved characters to SGML entities
+	if (HTML)
+	{
+	    gsub(/>/,	"\\&gt;",	s)
+	    gsub(/</,	"\\&lt;",	s)
+	    gsub(/"/,	"\\&quot;",	s)
+	}
+
+	gsub(/[$][$]/,"$$",s)	# change display math to triple dollars for split
+	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
+
+	s = ""
+	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
+		s = s ((k > 1) ? "$" : "") \
+			((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
+			TeX_to_HTML_math(parts[k]))
+
+	gsub(/[$][$][$]/,"$$",s) # restore display math
+
+	return (s)
+}
+
+
+function TeX_to_HTML_math(s)
+{
+	# Mostly a dummy for now, but HTML 3 could support some math translation
+
+	gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
+
+	return (s)
+}
+
+
+function TeX_to_HTML_nonmath(s)
+{
+	if (index(s,"\\") > 0)			# important optimization
+	{
+		gsub(/\\slash +/,"/",s)		# replace TeX slashes with conventional ones
+		gsub(/ *\\emdash +/," --- ",s)	# replace BibNet emdashes with conventional ones
+		gsub(/\\%/,"%",s)		# reduce TeX percents to conventional ones
+		gsub(/\\[$]/,"$",s)		# reduce TeX dollars to conventional ones
+		gsub(/\\#/,"#",s)		# reduce TeX sharps to conventional ones
+
+		if (HTML)			# translate TeX markup to HTML
+		{
+			gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
+			s = html_accents(s)
+			s = html_fonts(s)
+		}
+		else				# plain ASCII text output: discard all TeX markup
+		{
+			gsub(/\\\&/, "\\&", s)	# reduce TeX ampersands to conventional ones
+
+			gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
+			gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
+		}
+	}
+	return (s)
+}
+
+
+function trim(s)
+{
+    gsub(/^[ \t]+/,"",s)
+    gsub(/[ \t]+$/,"",s)
+    return (s)
+}
+
+
+function vol_no_month_year()
+{
+	return ("Volume " wrap(Volume)  ",  Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
+}
+
+
+function wrap(value)
+{
+	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
+}
diff --git a/contrib/awk/test/funstack.in b/contrib/awk/test/funstack.in
new file mode 100644
index 0000000..7a29a25
--- /dev/null
+++ b/contrib/awk/test/funstack.in
@@ -0,0 +1,206 @@
+%%% ====================================================================
+%%%  BibTeX-file{
+%%%     author          = "Nelson H. F. Beebe",
+%%%     version         = "2.09",
+%%%     date            = "26 March 1997",
+%%%     time            = "08:21:19 MST",
+%%%     filename        = "cacm1970.bib",
+%%%     address         = "Center for Scientific Computing
+%%%                        Department of Mathematics
+%%%                        University of Utah
+%%%                        Salt Lake City, UT 84112
+%%%                        USA",
+%%%     telephone       = "+1 801 581 5254",
+%%%     FAX             = "+1 801 581 4148",
+%%%     checksum        = "50673 40670 196033 1787829",
+%%%     email           = "beebe at math.utah.edu (Internet)",
+%%%     codetable       = "ISO/ASCII",
+%%%     keywords        = "bibliography, CACM, Communications of the
+%%%                        ACM",
+%%%     supported       = "yes",
+%%%     docstring       = "This is a bibliography of the journal
+%%%                        Communications of the ACM, covering
+%%%                        (incompletely) 1970 -- 1979.
+%%%
+%%%                        At version 2.09, the year coverage looked
+%%%                        like this:
+%%%
+%%%                             1961 (  1)     1972 (168)     1983 (  0)
+%%%                             1962 (  1)     1973 (158)     1984 (  0)
+%%%                             1963 (  2)     1974 (127)     1985 (  2)
+%%%                             1964 (  2)     1975 (107)     1986 (  0)
+%%%                             1965 (  1)     1976 ( 97)     1987 (  0)
+%%%                             1966 (  2)     1977 (117)     1988 (  0)
+%%%                             1967 (  1)     1978 (118)     1989 (  0)
+%%%                             1968 (  1)     1979 ( 78)     1990 (  2)
+%%%                             1969 (  3)     1980 (  1)     1991 (  4)
+%%%                             1970 (157)     1981 (  2)     1992 (  1)
+%%%                             1971 (104)     1982 (  1)
+%%%
+%%%                             Article:       1252
+%%%                             Book:             2
+%%%                             InProceedings:    1
+%%%                             Manual:           1
+%%%                             MastersThesis:    1
+%%%                             PhdThesis:        1
+%%%
+%%%                             Total entries: 1258
+%%%
+%%%                        The size of the original cacm.bib file
+%%%                        covering 1958--1996 became too large (about
+%%%                        4000 entries) for BibTeX and TeX to handle,
+%%%                        so at version 1.44, it was split into
+%%%                        cacm1950.bib, cacm1960.bib, cacm1970.bib,
+%%%                        cacm1980.bib, and cacm1990.bib, each covering
+%%%                        the decade starting with the year embedded in
+%%%                        the filename.  Version numbers for these
+%%%                        files begin at 2.00.
+%%%
+%%%                        Volumes from the 1990s average more than 200
+%%%                        articles yearly, so a complete bibliography
+%%%                        for this journal could contain more than 6000
+%%%                        entries from 1958 to 2000.
+%%%
+%%%                        These bibliographies also include ACM
+%%%                        Algorithms 1--492.  For Algorithms 493--686,
+%%%                        including Algorithm 568, published in ACM
+%%%                        Transactions on Programming Languages and
+%%%                        Systems (TOPLAS), see the companion
+%%%                        bibliographies, toms.bib and toplas.bib.
+%%%
+%%%                        All published Remarks and Corrigenda are
+%%%                        cross-referenced in both directions, so
+%%%                        that citing a paper will automatically
+%%%                        generate citations for those Remarks and
+%%%                        Corrigenda.  Cross-referenced entries are
+%%%                        duplicated in cacm19*.bib and toms.bib, so
+%%%                        that each is completely self-contained.
+%%%
+%%%                        Source code for ACM Algorithms from 380
+%%%                        onwards, with some omissions, is available
+%%%                        via the Netlib service at
+%%%                        http://netlib.ornl.gov/, and
+%%%                        ftp://netlib.bell-labs.com/netlib/toms.
+%%%
+%%%                        There is a World Wide Web search facility
+%%%                        for articles published in this journal from
+%%%                        1959 to 1979 at
+%%%                        http://ciir.cs.umass.edu/cgi-bin/web_query_form/public/cacm2.1.
+%%%
+%%%                        The initial draft of entries for 1981 --
+%%%                        1990 was extracted from the ACM Computing
+%%%                        Archive CD ROM for the 1980s, with manual
+%%%                        corrections and additions.  Additions were
+%%%                        then made from all of the bibliographies in
+%%%                        the TeX User Group collection, from
+%%%                        bibliographies in the author's personal
+%%%                        files, from the Compendex database
+%%%                        (1970--1979), from the IEEE INSPEC database
+%%%                        (1970--1979), from tables of contents
+%%%                        information at http://www.acm.org/pubs/cacm/,
+%%%                        from Zentralblatt fur Mathematik Mathematics
+%%%                        Abstracts at
+%%%                        http://www.emis.de/cgi-bin/MATH/, from
+%%%                        bibliographies at Internet host
+%%%                        netlib.bell-labs.com, and from the computer
+%%%                        science bibliography collection on
+%%%                        ftp.ira.uka.de in /pub/bibliography to which
+%%%                        many people of have contributed.  The
+%%%                        snapshot of this collection was taken on
+%%%                        5-May-1994, and it consists of 441 BibTeX
+%%%                        files, 2,672,675 lines, 205,289 entries, and
+%%%                        6,375 <at>String{} abbreviations, occupying
+%%%                        94.8MB of disk space.
+%%%
+%%%                        Numerous errors in the sources noted above
+%%%                        have been corrected.   Spelling has been
+%%%                        verified with the UNIX spell and GNU ispell
+%%%                        programs using the exception dictionary
+%%%                        stored in the companion file with extension
+%%%                        .sok.
+%%%
+%%%                        BibTeX citation tags are uniformly chosen
+%%%                        as name:year:abbrev, where name is the
+%%%                        family name of the first author or editor,
+%%%                        year is a 4-digit number, and abbrev is a
+%%%                        3-letter condensation of important title
+%%%                        words. Citation tags were automatically
+%%%                        generated by software developed for the
+%%%                        BibNet Project.
+%%%
+%%%                        In this bibliography, entries are sorted in
+%%%                        publication order within each journal,
+%%%                        using bibsort -byvolume.
+%%%
+%%%                        The checksum field above contains a CRC-16
+%%%                        checksum as the first value, followed by the
+%%%                        equivalent of the standard UNIX wc (word
+%%%                        count) utility output of lines, words, and
+%%%                        characters.  This is produced by Robert
+%%%                        Solovay's checksum utility.",
+%%%  }
+%%% ====================================================================
+
+@Preamble{"\input bibnames.sty " # "\input path.sty " # "\def \TM {${}^{\sc TM}$} " # "\hyphenation{ al-pha-mer-ic Balz-er Blom-quist Bo-ta-fo-go Bran-din Brans-comb Bu-tera Chris-tina Christ-o-fi-des Col-lins Cor-dell data-base econ-omies Fletch-er
+ flow-chart flow-charts Fry-styk ge-dank-en Gar-fink-el Ge-ha-ni Glush-ko Goud-reau Gua-dan-go Hari-di Haw-thorn Hem-men-ding-er Hor-o-witz Hour-vitz Hirsch-berg Ike-da Ka-chi-tvi-chyan-u-kul Kat-ze-nel-son Kitz-miller Ko-ba-yashi Le-Me-tay-er Ken-ne-dy
+ Law-rence Mac-kay Mai-net-ti Mar-sa-glia Max-well Mer-ner Mo-ran-di Na-ray-an New-ell Nich-ols para-digm pat-ent-ed Phi-lo-kyp-rou Prep-a-ra-ta pseu-do-chain-ing QUIK-SCRIPT Rad-e-mach-er re-eval-u-a-tion re-wind Ros-witha Scheu-er-mann Schwach-heim
+ Schob-bens Schon-berg Sho-sha-ni Si-tha-ra-ma Skwa-rec-ki Streck-er Strin-gi-ni Tes-ler Te-zu-ka Teu-ho-la Till-quist Town-send Tsi-chri-tzis Tur-ski Vuille-min Wald-ing-er Za-bo-row-ski Za-mora }"}
+
+%=======================================================================
+% Acknowledgement abbreviations:
+
+@String{ack-nhfb = "Nelson H. F. Beebe, Center for Scientific Computing, Department of Mathematics, University of Utah, Salt Lake City, UT 84112, USA, Tel: +1 801 581 5254, FAX: +1 801 581 4148, e-mail: \path|beebe@math.utah.edu|"}
+
+@String{ack-nj = "Norbert Juffa, 2445 Mission College Blvd. Santa Clara, CA 95054 USA email: \path=norbert@iit.com="}
+
+%=======================================================================
+% Journal abbreviations:
+
+@String{j-CACM = "Communications of the ACM"}
+
+@String{j-COMP-SURV = "Computing Surveys"}
+
+@String{j-J-ACM = "Journal of the ACM"}
+
+@String{j-MANAGEMENT-SCIENCE = "Management Science"}
+
+@String{j-SIAM-J-COMPUT = "SIAM Journal of Computing"}
+
+@String{j-SPE =  "Software --- Practice and Experience"}
+
+@String{j-TOMS = "ACM Transactions on Mathematical Software"}
+
+%=======================================================================
+% Publisher abbreviations:
+
+@String{pub-ANSI = "American National Standards Institute"}
+
+@String{pub-ANSI:adr = "1430 Broadway, New York, NY 10018, USA"}
+
+@String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
+
+@String{pub-AW:adr = "Reading, MA, USA"}
+
+@String{pub-SUCSLI = "Stanford University Center for the Study of Language and Information"}
+
+@String{pub-SUCSLI:adr = "Stanford, CA, USA"}
+
+@String{pub-SV = "Spring{\-}er-Ver{\-}lag"}
+
+@String{pub-SV:adr = "Berlin, Germany~/ Heidelberg, Germany~/ London, UK~/ etc."}
+@MastersThesis{Dittmer:1976:IEP,
+  author =       "Ingo Dittmer",
+  title =        "{Implementation eines Einschrittcompilers f{\"u}r die Progammiersprache PASCAL auf der Rechenanlage IBM\slash 360 der Universit{\"a}t M{\"u}nster}. ({English} title: Implementation of a One-Step Compiler for the Programming Language
+ {PASCAL} on the {IBM}\slash 360 of the {University of Muenster})",
+  type =         "Diplomearbeit",
+  school =       "Universit{\"a}t M{\"u}nster",
+  address =      "M{\"u}nster, Germany",
+  pages =        "??",
+  month =        "??",
+  year =         "1976",
+  bibdate =      "Sat Feb 17 13:24:29 1996",
+  note =         "Diplomearbeit M{\"u}nster 1976 und doert angegebene Literatur (English: Muenster diploma work 1976 and the literature cited therein). The hashing method was rediscovered fourteen years later by Pearson \cite{Pearson:1990:FHV}, and then
+ commented on by several authors \cite{Dittmer:1991:NFH,Savoy:1991:NFH,Litsios:1991:NFH,Pearson:1991:NFH}.",
+  acknowledgement = ack-nhfb,
+  xxnote =       "Cannot find in Dissertation Abstracts, European.",
+}
diff --git a/contrib/awk/test/funstack.ok b/contrib/awk/test/funstack.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/gensub.awk b/contrib/awk/test/gensub.awk
new file mode 100644
index 0000000..f91d84d
--- /dev/null
+++ b/contrib/awk/test/gensub.awk
@@ -0,0 +1,7 @@
+BEGIN { a = "this is a test of gawk"
+	b = gensub(/(this).*(test).*(gawk)/, "3 = <\\3>, 2 = <\\2>, 1 = <\\1>", 1, a)
+	print b
+}
+NR == 1 { print gensub(/b/, "BB", 2) }
+NR == 2 { print gensub(/c/, "CC", "global") }
+END { print gensub(/foo/, "bar", 1, "DON'T PANIC") }
diff --git a/contrib/awk/test/gensub.in b/contrib/awk/test/gensub.in
new file mode 100644
index 0000000..96c9faf
--- /dev/null
+++ b/contrib/awk/test/gensub.in
@@ -0,0 +1,2 @@
+a b c a b c a b c
+a b c a b c a b c
diff --git a/contrib/awk/test/gensub.ok b/contrib/awk/test/gensub.ok
new file mode 100644
index 0000000..b9ea3de
--- /dev/null
+++ b/contrib/awk/test/gensub.ok
@@ -0,0 +1,4 @@
+3 = <gawk>, 2 = <test>, 1 = <this>
+a b c a BB c a b c
+a b CC a b CC a b CC
+DON'T PANIC
diff --git a/contrib/awk/test/getline.awk b/contrib/awk/test/getline.awk
new file mode 100644
index 0000000..f4e413f
--- /dev/null
+++ b/contrib/awk/test/getline.awk
@@ -0,0 +1 @@
+BEGIN { while( getline > 0) { print } }
diff --git a/contrib/awk/test/getline.ok b/contrib/awk/test/getline.ok
new file mode 100644
index 0000000..9b7f2b9
--- /dev/null
+++ b/contrib/awk/test/getline.ok
@@ -0,0 +1,2 @@
+BEGIN { while( getline > 0) { print } }
+BEGIN { while( getline > 0) { print } }
diff --git a/contrib/awk/test/getlnhd.awk b/contrib/awk/test/getlnhd.awk
new file mode 100644
index 0000000..f0f801b
--- /dev/null
+++ b/contrib/awk/test/getlnhd.awk
@@ -0,0 +1,10 @@
+BEGIN { pipe =      "cat <<EOF\n"
+        pipe = pipe "select * from user\n"
+        pipe = pipe "  where Name = 'O\\'Donell'\n"
+        pipe = pipe "EOF\n"
+        
+        while ((pipe | getline) > 0)
+                print
+        
+        exit 0
+}       
diff --git a/contrib/awk/test/getlnhd.ok b/contrib/awk/test/getlnhd.ok
new file mode 100644
index 0000000..d8cb453
--- /dev/null
+++ b/contrib/awk/test/getlnhd.ok
@@ -0,0 +1,2 @@
+select * from user
+  where Name = 'O\'Donell'
diff --git a/contrib/awk/test/gnureops.awk b/contrib/awk/test/gnureops.awk
new file mode 100644
index 0000000..15b9b84
--- /dev/null
+++ b/contrib/awk/test/gnureops.awk
@@ -0,0 +1,45 @@
+# test the gnu regexp ops
+
+BEGIN {
+	if ("a rat is here" ~ /\yrat/)	print "test  1 ok (\\y)"
+	else				print "test  1 failed (\\y)"
+	if ("a rat is here" ~ /rat\y/)	print "test  2 ok (\\y)"
+	else				print "test  2 failed (\\y)"
+	if ("what a brat" !~ /\yrat/)	print "test  3 ok (\\y)"
+	else				print "test  3 failed (\\y)"
+
+	if ("in the crate" ~ /\Brat/)	print "test  4 ok (\\B)"
+	else				print "test  4 failed (\\B)"
+	if ("a rat" !~ /\Brat/)	print "test  5 ok (\\B)"
+	else				print "test  5 failed (\\B)"
+
+	if ("a word" ~ /\<word/)	print "test  6 ok (\\<)"
+	else				print "test  6 failed (\\<)"
+	if ("foreword" !~ /\<word/)	print "test  7 ok (\\<)"
+	else				print "test  7 failed (\\<)"
+
+	if ("a word" ~ /word\>/)	print "test  8 ok (\\>)"
+	else				print "test  8 failed (\\\\>)"
+	if ("wordy" !~ /word\>/)	print "test  9 ok (\\>)"
+	else				print "test  9 failed (\\>)"
+
+	if ("a" ~ /\w/)		print "test 10 ok (\\w)"
+	else				print "test 10 failed (\\\\w)"
+	if ("+" !~ /\w/)		print "test 11 ok (\\w)"
+	else				print "test 11 failed (\\w)"
+
+	if ("a" !~ /\W/)		print "test 12 ok (\\W)"
+	else				print "test 12 failed (\\W)"
+	if ("+" ~ /\W/)		print "test 13 ok (\\W)"
+	else				print "test 13 failed (\\W)"
+
+	if ("a" ~ /\`a/)		print "test 14 ok (\\`)"
+	else				print "test 14 failed (\\`)"
+	if ("b" !~ /\`a/)		print "test 15 ok (\\`)"
+	else				print "test 15 failed (\\`)"
+
+	if ("a" ~ /a\'/)		print "test 16 ok (\\')"
+	else				print "test 16 failed (\\')"
+	if ("b" !~ /a\'/)		print "test 17 ok (\\')"
+	else				print "test 17 failed (\\')"
+}
diff --git a/contrib/awk/test/gnureops.ok b/contrib/awk/test/gnureops.ok
new file mode 100644
index 0000000..0fb5f50
--- /dev/null
+++ b/contrib/awk/test/gnureops.ok
@@ -0,0 +1,17 @@
+test  1 ok (\y)
+test  2 ok (\y)
+test  3 ok (\y)
+test  4 ok (\B)
+test  5 ok (\B)
+test  6 ok (\<)
+test  7 ok (\<)
+test  8 ok (\>)
+test  9 ok (\>)
+test 10 ok (\w)
+test 11 ok (\w)
+test 12 ok (\W)
+test 13 ok (\W)
+test 14 ok (\`)
+test 15 ok (\`)
+test 16 ok (\')
+test 17 ok (\')
diff --git a/contrib/awk/test/gsubasgn.awk b/contrib/awk/test/gsubasgn.awk
new file mode 100644
index 0000000..f0b7701
--- /dev/null
+++ b/contrib/awk/test/gsubasgn.awk
@@ -0,0 +1,13 @@
+# tests for assigning to a function within that function
+
+#1 - should be bad
+function test1 (r) { gsub(r, "x", test1) }
+BEGIN { test1("") }
+
+#2 - should be bad
+function test2 () { gsub(/a/, "x", test2) }
+BEGIN { test2() }
+
+#3 - should be ok
+function test3 (r) { gsub(/a/, "x", r) }
+BEGIN { test3("") }
diff --git a/contrib/awk/test/gsubasgn.ok b/contrib/awk/test/gsubasgn.ok
new file mode 100644
index 0000000..dfa6fbc
--- /dev/null
+++ b/contrib/awk/test/gsubasgn.ok
@@ -0,0 +1,4 @@
+gawk: gsubasgn.awk:4: function test1 (r) { gsub(r, "x", test1) }
+gawk: gsubasgn.awk:4:                                        ^ gsub third parameter is not a changeable object
+gawk: gsubasgn.awk:8: function test2 () { gsub(/a/, "x", test2) }
+gawk: gsubasgn.awk:8:                                         ^ gsub third parameter is not a changeable object
diff --git a/contrib/awk/test/gsubtest.awk b/contrib/awk/test/gsubtest.awk
new file mode 100755
index 0000000..3137479
--- /dev/null
+++ b/contrib/awk/test/gsubtest.awk
@@ -0,0 +1,8 @@
+BEGIN {
+	str = "abc"; gsub("b+", "FOO", str); print str
+	str = "abc"; gsub("x*", "X", str); print str
+	str = "abc"; gsub("b*", "X", str); print str
+	str = "abc"; gsub("c", "X", str); print str
+	str = "abc"; gsub("c+", "X", str); print str
+	str = "abc"; gsub("x*$", "X", str); print str
+}
diff --git a/contrib/awk/test/gsubtest.ok b/contrib/awk/test/gsubtest.ok
new file mode 100644
index 0000000..191bebd
--- /dev/null
+++ b/contrib/awk/test/gsubtest.ok
@@ -0,0 +1,6 @@
+aFOOc
+XaXbXcX
+XaXcX
+abX
+abX
+abcX
diff --git a/contrib/awk/test/igncfs.awk b/contrib/awk/test/igncfs.awk
new file mode 100644
index 0000000..ebb58b2
--- /dev/null
+++ b/contrib/awk/test/igncfs.awk
@@ -0,0 +1,8 @@
+BEGIN   {
+        IGNORECASE=1
+        FS="[^a-z]+"
+}
+{
+        for (i=1; i<NF; i++) printf "%s, ", $i
+        printf "%s\n", $NF
+}
diff --git a/contrib/awk/test/igncfs.in b/contrib/awk/test/igncfs.in
new file mode 100644
index 0000000..5598017
--- /dev/null
+++ b/contrib/awk/test/igncfs.in
@@ -0,0 +1,2 @@
+this is handled ok
+This is Not hanDLed Well
diff --git a/contrib/awk/test/igncfs.ok b/contrib/awk/test/igncfs.ok
new file mode 100644
index 0000000..41df9a4
--- /dev/null
+++ b/contrib/awk/test/igncfs.ok
@@ -0,0 +1,2 @@
+this, is, handled, ok
+This, is, Not, hanDLed, Well
diff --git a/contrib/awk/test/ignrcase.ok b/contrib/awk/test/ignrcase.ok
new file mode 100644
index 0000000..d66e95c
--- /dev/null
+++ b/contrib/awk/test/ignrcase.ok
@@ -0,0 +1 @@
+xz
diff --git a/contrib/awk/test/inftest.awk b/contrib/awk/test/inftest.awk
new file mode 100644
index 0000000..ec0eda1
--- /dev/null
+++ b/contrib/awk/test/inftest.awk
@@ -0,0 +1,5 @@
+BEGIN {
+  x = 100
+  do { y = x ; x *= 1000; print x,y } while ( y != x )
+  print "loop terminated"
+}
diff --git a/contrib/awk/test/inftest.ok b/contrib/awk/test/inftest.ok
new file mode 100644
index 0000000..83a93d0
--- /dev/null
+++ b/contrib/awk/test/inftest.ok
@@ -0,0 +1,105 @@
+100000 100
+100000000 100000
+1e+11 100000000
+1e+14 1e+11
+1e+17 1e+14
+1e+20 1e+17
+1e+23 1e+20
+1e+26 1e+23
+1e+29 1e+26
+1e+32 1e+29
+1e+35 1e+32
+1e+38 1e+35
+1e+41 1e+38
+1e+44 1e+41
+1e+47 1e+44
+1e+50 1e+47
+1e+53 1e+50
+1e+56 1e+53
+1e+59 1e+56
+1e+62 1e+59
+1e+65 1e+62
+1e+68 1e+65
+1e+71 1e+68
+1e+74 1e+71
+1e+77 1e+74
+1e+80 1e+77
+1e+83 1e+80
+1e+86 1e+83
+1e+89 1e+86
+1e+92 1e+89
+1e+95 1e+92
+1e+98 1e+95
+1e+101 1e+98
+1e+104 1e+101
+1e+107 1e+104
+1e+110 1e+107
+1e+113 1e+110
+1e+116 1e+113
+1e+119 1e+116
+1e+122 1e+119
+1e+125 1e+122
+1e+128 1e+125
+1e+131 1e+128
+1e+134 1e+131
+1e+137 1e+134
+1e+140 1e+137
+1e+143 1e+140
+1e+146 1e+143
+1e+149 1e+146
+1e+152 1e+149
+1e+155 1e+152
+1e+158 1e+155
+1e+161 1e+158
+1e+164 1e+161
+1e+167 1e+164
+1e+170 1e+167
+1e+173 1e+170
+1e+176 1e+173
+1e+179 1e+176
+1e+182 1e+179
+1e+185 1e+182
+1e+188 1e+185
+1e+191 1e+188
+1e+194 1e+191
+1e+197 1e+194
+1e+200 1e+197
+1e+203 1e+200
+1e+206 1e+203
+1e+209 1e+206
+1e+212 1e+209
+1e+215 1e+212
+1e+218 1e+215
+1e+221 1e+218
+1e+224 1e+221
+1e+227 1e+224
+1e+230 1e+227
+1e+233 1e+230
+1e+236 1e+233
+1e+239 1e+236
+1e+242 1e+239
+1e+245 1e+242
+1e+248 1e+245
+1e+251 1e+248
+1e+254 1e+251
+1e+257 1e+254
+1e+260 1e+257
+1e+263 1e+260
+1e+266 1e+263
+1e+269 1e+266
+1e+272 1e+269
+1e+275 1e+272
+1e+278 1e+275
+1e+281 1e+278
+1e+284 1e+281
+1e+287 1e+284
+1e+290 1e+287
+1e+293 1e+290
+1e+296 1e+293
+1e+299 1e+296
+1e+302 1e+299
+1e+305 1e+302
+1e+308 1e+305
+Inf 1e+308
+Inf Inf
+loop terminated
diff --git a/contrib/awk/test/intest.awk b/contrib/awk/test/intest.awk
new file mode 100644
index 0000000..f030d07
--- /dev/null
+++ b/contrib/awk/test/intest.awk
@@ -0,0 +1,4 @@
+BEGIN {
+	bool = ((b = 1) in c);
+	print bool, b	# gawk-3.0.1 prints "0 "; should print "0 1"
+} 
diff --git a/contrib/awk/test/intest.ok b/contrib/awk/test/intest.ok
new file mode 100644
index 0000000..6e8183b
--- /dev/null
+++ b/contrib/awk/test/intest.ok
@@ -0,0 +1 @@
+0 1
diff --git a/contrib/awk/test/intprec.awk b/contrib/awk/test/intprec.awk
new file mode 100644
index 0000000..978e9ea
--- /dev/null
+++ b/contrib/awk/test/intprec.awk
@@ -0,0 +1 @@
+BEGIN { printf "%.10d:%.10x\n", 5, 14 }
diff --git a/contrib/awk/test/intprec.ok b/contrib/awk/test/intprec.ok
new file mode 100644
index 0000000..8783fac
--- /dev/null
+++ b/contrib/awk/test/intprec.ok
@@ -0,0 +1 @@
+0000000005:000000000e
diff --git a/contrib/awk/test/lib/awkpath.awk b/contrib/awk/test/lib/awkpath.awk
new file mode 100644
index 0000000..6663ca4
--- /dev/null
+++ b/contrib/awk/test/lib/awkpath.awk
@@ -0,0 +1 @@
+BEGIN { print "Found it." }
diff --git a/contrib/awk/test/litoct.awk b/contrib/awk/test/litoct.awk
new file mode 100644
index 0000000..5cfc128
--- /dev/null
+++ b/contrib/awk/test/litoct.awk
@@ -0,0 +1 @@
+{ if (/a\52b/) print "match" ; else print "no match" }
diff --git a/contrib/awk/test/litoct.ok b/contrib/awk/test/litoct.ok
new file mode 100644
index 0000000..4c0be97
--- /dev/null
+++ b/contrib/awk/test/litoct.ok
@@ -0,0 +1 @@
+no match
diff --git a/contrib/awk/test/longwrds.awk b/contrib/awk/test/longwrds.awk
new file mode 100644
index 0000000..f6a7816
--- /dev/null
+++ b/contrib/awk/test/longwrds.awk
@@ -0,0 +1,20 @@
+# From Gawk Manual modified by bug fix and removal of punctuation
+# Record every word which is used at least once
+{
+	for (i = 1; i <= NF; i++) {
+		tmp = tolower($i)
+		if (0 != (pos = match(tmp, /([a-z]|-)+/)))
+			used[substr(tmp, pos, RLENGTH)] = 1
+	}
+}
+
+#Find a number of distinct words longer than 10 characters
+END {
+	num_long_words = 0
+	for (x in used) 
+		if (length(x) > 10) {
+			++num_long_words
+			print x
+		}
+	print num_long_words, "long words"
+}
diff --git a/contrib/awk/test/longwrds.ok b/contrib/awk/test/longwrds.ok
new file mode 100644
index 0000000..01faa84
--- /dev/null
+++ b/contrib/awk/test/longwrds.ok
@@ -0,0 +1,21 @@
+20 long words
+compatibility
+concatenated
+consistency
+definitions
+description
+distributing
+fistatements
+gawk-options
+gnu-specific
+identically
+implementation
+implementations
+information
+non-portable
+pattern-action
+pre-defined
+program-file
+program-text
+programming
+restrictions
diff --git a/contrib/awk/test/manpage b/contrib/awk/test/manpage
new file mode 100644
index 0000000..09c3948
--- /dev/null
+++ b/contrib/awk/test/manpage
@@ -0,0 +1,200 @@
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "May 28 1991" "Free Software Foundation" "Utility Commands"
+.SH NAME
+gawk \- pattern scanning and processing language
+.SH SYNOPSIS
+.B gawk
+[
+.B \-W
+.I gawk-options
+] [
+.BI \-F\^ fs
+] [
+.B \-v
+.IR var = val
+]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B gawk
+[
+.B \-W
+.I gawk-options
+] [
+.BI \-F\^ fs
+] [
+.B \-v
+.IR var = val
+] [
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Gawk
+is the GNU Project's implementation of the AWK programming language.
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard
+(draft 11).
+This version in turn is based on the description in
+.IR "The AWK Programming Language" ,
+by Aho, Kernighan, and Weinberger,
+with the additional features defined in the System V Release 4 version
+of \*(UX
+.IR awk .
+.I Gawk
+also provides some GNU-specific extensions.
+.PP
+The command line consists of options to
+.I gawk
+itself, the AWK program text (if not supplied via the
+.B \-f
+option), and values to be made
+available in the
+.B ARGC
+and
+.B ARGV
+pre-defined AWK variables.
+.SH OPTIONS
+.PP
+.I Gawk
+accepts the following options, which should be available on any implementation
+of the AWK language.
+.TP
+.BI \-F fs
+Use
+.I fs
+for the input field separator (the value of the
+.B FS
+predefined
+variable).
+.TP
+\fB\-v\fI var\fR\^=\^\fIval\fR
+Assign the value
+.IR val ,
+to the variable
+.IR var ,
+before execution of the program begins.
+Such variable values are available to the
+.B BEGIN
+block of an AWK program.
+.TP
+.BI \-f " program-file"
+Read the AWK program source from the file
+.IR program-file ,
+instead of from the first command line argument.
+Multiple
+.B \-f
+options may be used.
+.TP
+.B \-\^\-
+Signal the end of options. This is useful to allow further arguments to the
+AWK program itself to start with a ``\-''.
+This is mainly for consistency with the argument parsing convention used
+by most other \*(PX programs.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option.  Multiple
+.B \-W
+options may be supplied, or multiple arguments may be supplied together
+if they are separated by commas, or enclosed in quotes and separated
+by white space.
+Case is ignored in arguments to the
+.B \-W
+option.
+.PP
+The
+.B \-W
+option accepts the following arguments:
+.TP \w'\fBcopyright\fR'u+1n
+.B compat
+Run in
+.I compatibility
+mode.  In compatibility mode,
+.I gawk
+behaves identically to \*(UX
+.IR awk ;
+none of the GNU-specific extensions are recognized.
+.TP
+.PD 0
+.B copyleft
+.TP
+.PD
+.B copyright
+Print the short version of the GNU copyright information message on
+the error output.
+.TP
+.B lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.TP
+.B posix
+This turns on
+.I compatibility 
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.RE
+.TP
+.B version
+Print version information for this particular copy of
+.I gawk
+on the error output.
+This is useful mainly for knowing if the current copy of
+.I gawk
+on your system
+is up to date with respect to whatever the Free Software Foundation
+is distributing.
+.PP
+Any other options are flagged as illegal, but are otherwise ignored.
+.SH AWK PROGRAM EXECUTION
+.PP
+An AWK program consists of a sequence of pattern-action statements
+and optional function definitions.
+.RS
+.PP
+\fIpattern\fB	{ \fIaction statements\fB }\fR
+.br
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
+.RE
+.PP
+.I Gawk
+first reads the program source from the
+.IR program-file (s)
+if specified, or from the first non-option argument on the command line.
+The
+.B \-f
+option may be used multiple times on the command line.
+.I Gawk
+will read the program text as if all the
+.IR program-file s
+had been concatenated together.  This is useful for building libraries
+of AWK functions, without having to include them in each new AWK
diff --git a/contrib/awk/test/manyfiles.awk b/contrib/awk/test/manyfiles.awk
new file mode 100644
index 0000000..8651a3a
--- /dev/null
+++ b/contrib/awk/test/manyfiles.awk
@@ -0,0 +1 @@
+{ print $2 > ("junk/" $1) }
diff --git a/contrib/awk/test/math.awk b/contrib/awk/test/math.awk
new file mode 100644
index 0000000..90a01dd
--- /dev/null
+++ b/contrib/awk/test/math.awk
@@ -0,0 +1,10 @@
+BEGIN {
+	pi = 3.1415927
+	printf "cos(%f) = %f\n", pi/4, cos(pi/4)
+	printf "sin(%f) = %f\n", pi/4, sin(pi/4)
+	e = exp(1)
+	printf "e = %f\n", e
+	printf "log(e) = %f\n", log(e)
+	printf "sqrt(pi ^ 2) = %f\n", sqrt(pi ^ 2)
+	printf "atan2(1, 1) = %f\n", atan2(1, 1)
+}
diff --git a/contrib/awk/test/math.ok b/contrib/awk/test/math.ok
new file mode 100644
index 0000000..a396a5b
--- /dev/null
+++ b/contrib/awk/test/math.ok
@@ -0,0 +1,6 @@
+cos(0.785398) = 0.707107
+sin(0.785398) = 0.707107
+e = 2.718282
+log(e) = 1.000000
+sqrt(pi ^ 2) = 3.141593
+atan2(1, 1) = 0.785398
diff --git a/contrib/awk/test/messages.awk b/contrib/awk/test/messages.awk
new file mode 100644
index 0000000..555f6e3
--- /dev/null
+++ b/contrib/awk/test/messages.awk
@@ -0,0 +1,9 @@
+# This is a demo of different ways of printing with gawk.  Try it
+# with and without -c (compatibility) flag, redirecting output
+# from gawk to a file or not.  Some results can be quite unexpected. 
+BEGIN {
+	print "Goes to a file out1" > "out1"
+	print "Normal print statement"
+	print "This printed on stdout" > "/dev/stdout"
+	print "You blew it!" > "/dev/stderr"
+}
diff --git a/contrib/awk/test/mmap8k.in b/contrib/awk/test/mmap8k.in
new file mode 100644
index 0000000..0500ddf
--- /dev/null
+++ b/contrib/awk/test/mmap8k.in
@@ -0,0 +1,143 @@
+XXXXXXXX.com   ALTERNET    9305 930528  1500.00 startup
+XXXXXXXX.com   ALTERNET    9305 930624    94.38 Line-9305
+XXXXXXXX.com   ALTERNET    9306 930624   104.49 Line-9306
+XXXXXXXX.com   ALTERNET    9306 930624   649.16 Line-install
+XXXXXXXX.com   ALTERNET    9306 930624   166.67 TCP-slip
+XXXXXXXX.com   ALTERNET    9307 930624   104.49 Line-9307
+XXXXXXXX.com   ALTERNET    9307 930624   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9308 930701   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9308 930701   104.49 line-9308
+XXXXXXXX.com   PAYMENT     9307 930731  1500.00 1870
+XXXXXXXX.com   ALTERNET    9309 930801   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9309 930801   104.49 line-9309
+XXXXXXXX.com   INTEREST    9307 930801    22.50
+XXXXXXXX.com   CREDADJ     9308 930805    22.50 waive interest
+XXXXXXXX.com   PAYMENT     9308 930820  1723.68 1982
+XXXXXXXX.com   ALTERNET    9310 930901   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9310 930901   104.49 line-9310
+XXXXXXXX.com   PAYMENT     9310 931001   708.98 2313
+XXXXXXXX.com   ALTERNET    9311 931001   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9311 931001   104.49 line-9311
+XXXXXXXX.com   INTEREST    9309 931001     5.32
+XXXXXXXX.com   CREDADJ     9310 931007     5.32 waive int-9309
+XXXXXXXX.com   ALTERNET    9312 931101   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9312 931101   104.49 line-9312
+XXXXXXXX.com   PAYMENT     9311 931120   354.49 002701
+XXXXXXXX.com   ALTERNET    9401 931201   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9401 931201   104.49 line-9401
+XXXXXXXX.com   PAYMENT     9312 931218   354.49 2884
+XXXXXXXX.com   ALTERNET    9402 940101   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9402 940101   104.49 line-9402
+XXXXXXXX.com   INTEREST    9312 940101     5.32
+XXXXXXXX.com   PAYMENT     9401 940122   354.49 3084
+XXXXXXXX.com   ALTERNET    9403 940201   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9403 940201   104.49 line-9403
+XXXXXXXX.com   INTEREST    9401 940201     5.40
+XXXXXXXX.com   PAYMENT     9402 940207   354.49 3140
+XXXXXXXX.com   CREDADJ     9402 940211     5.32 interest-9402
+XXXXXXXX.com   CREDADJ     9402 940211     5.40 interest-9403
+XXXXXXXX.com   ALTERNET    9404 940301   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9404 940301   104.49 line-9404
+XXXXXXXX.com   INTEREST    9402 940301     5.32
+XXXXXXXX.com   PAYMENT     9403 940310   354.49 003307
+XXXXXXXX.com   PAYMENT     9403 940324   354.49 3446
+XXXXXXXX.com   ALTERNET    9405 940401   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9405 940401   104.49 line-9405
+XXXXXXXX.com   ALTERNET    9406 940501   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9406 940501   104.49 line-9406
+XXXXXXXX.com   INTEREST    9404 940501     5.40
+XXXXXXXX.com   PAYMENT     9405 940509   359.81 003819
+XXXXXXXX.com   ALTERNET    9407 940601   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9407 940601   104.49 line-9407
+XXXXXXXX.com   INTEREST    9405 940601     5.40
+XXXXXXXX.com   PAYMENT     9406 940603   354.49 004025
+XXXXXXXX.com   ALTERNET    9408 940701   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9408 940701   104.49 line-9408
+XXXXXXXX.com   INTEREST    9406 940701     5.48
+XXXXXXXX.com   PAYMENT     9407 940725   354.49 004350
+XXXXXXXX.com   ALTERNET    9409 940801   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9409 940801   104.49 line-9409
+XXXXXXXX.com   INTEREST    9407 940801     5.56
+XXXXXXXX.com   PAYMENT     9408 940808   354.49 004454
+XXXXXXXX.com   ALTERNET    9409 940811     0.00 startup
+XXXXXXXX.com   EQUIPMENT   9408 940831   399.00 ATL6402-1
+XXXXXXXX.com   EQUIPMENT   9408 940831  2295.00 NBClassicPac-1
+XXXXXXXX.com   EQUIPMENT   9408 940831  1060.00 Syn35-1+ship
+XXXXXXXX.com   ALTERNET    9410 940901   250.00 TCP-slip
+XXXXXXXX.com   ALTERNET    9410 940901   104.49 line-9410
+XXXXXXXX.com   INTEREST    9408 940901     5.64
+XXXXXXXX.com   PAYMENT     9409 940906   354.49 004677
+XXXXXXXX.com   CREDADJ     9409 940921   124.95 TCP-slip-9409
+XXXXXXXX.com   CREDADJ     9409 940921    52.20 line-9409
+XXXXXXXX.com   CREDADJ     9410 940921   250.00 TCP-slip-9410
+XXXXXXXX.com   CREDADJ     9410 940921   104.49 line-9410
+XXXXXXXX.com   ALTERNET    9409 940921   397.50 TCP-56k-local recon
+XXXXXXXX.com   ALTERNET    9409 940921    87.45 line-9409 recon
+XXXXXXXX.com   ALTERNET    9410 940921   795.00 TCP-56k-local recon
+XXXXXXXX.com   ALTERNET    9410 940921   174.90 line-9410 recon
+XXXXXXXX.com   ALTERNET    9411 941001   795.00 TCP-56k-local
+XXXXXXXX.com   ALTERNET    9411 941001   174.90 line-9411
+XXXXXXXX.com   INTEREST    9409 941001    54.06
+XXXXXXXX.com   PAYMENT     9410 941017   354.49 5026
+XXXXXXXX.com   ALTERNET    9412 941101   795.00 TCP-56k-local
+XXXXXXXX.com   ALTERNET    9412 941101   174.90 line-9412
+XXXXXXXX.com   INTEREST    9410 941101    85.93
+XXXXXXXX.com   PAYMENT     9411 941114   969.90 005274
+XXXXXXXX.com   ALTERNET    9501 941201   795.00 TCP-56k-local
+XXXXXXXX.com   ALTERNET    9501 941201   174.90 line-9501
+XXXXXXXX.com   INTEREST    9411 941201    87.22
+XXXXXXXX.com   PAYMENT     9412 941219  4723.90 5590
+XXXXXXXX.com   ALTERNET    9502 950101   795.00 TCP-56k-local
+XXXXXXXX.com   ALTERNET    9502 950101   174.90 line-9502
+XXXXXXXX.com   INTEREST    9412 950101    32.22
+XXXXXXXX.com   PAYMENT     9501 950103  1893.11 5766
+XXXXXXXX.com   ALTERNET    9503 950201   795.00 TCP-56k-local-old
+XXXXXXXX.com   ALTERNET    9503 950201   174.90 line-9503
+XXXXXXXX.com   INTEREST    9501 950201    18.85
+XXXXXXXX.com   PAYMENT     9502 950207   969.90 6044
+XXXXXXXX.com   ALTERNET    9504 950301   795.00 TCP-56k-local-old
+XXXXXXXX.com   ALTERNET    9504 950301   174.90 line-9504
+XXXXXXXX.com   INTEREST    9502 950301    19.13
+XXXXXXXX.com   PAYMENT     9503 950307   969.90 6408
+XXXXXXXX.com   ALTERNET    9504 950316  3000.00 startup TCP-bt1-128k%5
+XXXXXXXX.com   PAYMENT     9503 950327   969.90 6594
+XXXXXXXX.com   ALTERNET    9505 950401  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9505 950401   556.60 line-9505
+XXXXXXXX.com   EQUIPMENT   9504 950410  1595.00 cisco2501-1
+XXXXXXXX.com   CREDADJ     9504 950417   503.50 TCP-56k-local
+XXXXXXXX.com   CREDADJ     9504 950417   116.60 line-9504
+XXXXXXXX.com   ALTERNET    9504 950417   448.80 line-install
+XXXXXXXX.com   ALTERNET    9504 950417   752.02 TCP-bt1-128k%5 recon
+XXXXXXXX.com   ALTERNET    9504 950417   371.00 line-9504 recon
+XXXXXXXX.com   PAYMENT     9504 950424  3000.00 6841
+XXXXXXXX.com   ALTERNET    9506 950501  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9506 950501   556.60 line-9506
+XXXXXXXX.com   PAYMENT     9505 950505  2049.86 6985
+XXXXXXXX.com   PAYMENT     9505 950531  3924.22 7179
+XXXXXXXX.com   ALTERNET    9507 950601  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9507 950601   556.60 line-9507
+XXXXXXXX.com   PAYMENT     9506 950607  1744.10 7232
+XXXXXXXX.com   ALTERNET    9508 950701  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9508 950701   556.60 line-9508
+XXXXXXXX.com   PAYMENT     9507 950705  1744.10 7641
+XXXXXXXX.com   ALTERNET    9509 950801  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9509 950801   556.60 line-9509
+XXXXXXXX.com   PAYMENT     9508 950803  1744.10 7914
+XXXXXXXX.com   ALTERNET    9510 950901  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9510 950901   556.60 line-9510
+XXXXXXXX.com   PAYMENT     9509 950905  1744.10 8203
+XXXXXXXX.com   ALTERNET    9511 951001  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9511 951001   556.60 line-9511
+XXXXXXXX.com   PAYMENT     9510 951003  1744.10 8508
+XXXXXXXX.com   ALTERNET    9512 951101  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9512 951101   556.60 line-9512
+XXXXXXXX.com   PAYMENT     9511 951103  2129.83 8837
+XXXXXXXX.com   ALTERNET    9601 951201  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9601 951201   556.60 line-9601
+XXXXXXXX.com   PAYMENT     9512 951204  2129.83 9131
+XXXXXXXX.com   ALTERNET    9602 960101  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9602 960101   556.60 line-9602
+XXXXXXXX.com   PAYMENT     9601 960103  1744.10 9456
+XXXXXXXX.com   ALTERNET    9603 960201  1187.50 TCP-bt1-128k%5.00
+XXXXXXXX.com   ALTERNET    9603 960201   556.60 line-9603
+XXXXXXXX.com   PAYMENT     9602 960205  1358.37 9834
diff --git a/contrib/awk/test/negexp.ok b/contrib/awk/test/negexp.ok
new file mode 100644
index 0000000..6e6566c
--- /dev/null
+++ b/contrib/awk/test/negexp.ok
@@ -0,0 +1 @@
+0.01
diff --git a/contrib/awk/test/nfldstr.ok b/contrib/awk/test/nfldstr.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/nfset.awk b/contrib/awk/test/nfset.awk
new file mode 100644
index 0000000..09ebd08
--- /dev/null
+++ b/contrib/awk/test/nfset.awk
@@ -0,0 +1 @@
+{ NF = 5 ; print }
diff --git a/contrib/awk/test/nfset.in b/contrib/awk/test/nfset.in
new file mode 100644
index 0000000..43329b5
--- /dev/null
+++ b/contrib/awk/test/nfset.in
@@ -0,0 +1,5 @@
+1 2
+1 2 3 4
+1 2 3 4 5
+1 2 3 4 5 6 7 8
+1
diff --git a/contrib/awk/test/nfset.ok b/contrib/awk/test/nfset.ok
new file mode 100644
index 0000000..3ba48ae
--- /dev/null
+++ b/contrib/awk/test/nfset.ok
@@ -0,0 +1,5 @@
+1 2   
+1 2 3 4 
+1 2 3 4 5
+1 2 3 4 5
+1    
diff --git a/contrib/awk/test/nlfldsep.awk b/contrib/awk/test/nlfldsep.awk
new file mode 100644
index 0000000..4fac81d
--- /dev/null
+++ b/contrib/awk/test/nlfldsep.awk
@@ -0,0 +1,2 @@
+BEGIN { RS = "A" }
+{print NF; for (i = 1; i <= NF; i++) print $i ; print ""}
diff --git a/contrib/awk/test/nlfldsep.in b/contrib/awk/test/nlfldsep.in
new file mode 100644
index 0000000..7b2317f
--- /dev/null
+++ b/contrib/awk/test/nlfldsep.in
@@ -0,0 +1,5 @@
+some stuff
+more stuffA
+junk
+stuffA
+final
diff --git a/contrib/awk/test/nlfldsep.ok b/contrib/awk/test/nlfldsep.ok
new file mode 100644
index 0000000..6684916
--- /dev/null
+++ b/contrib/awk/test/nlfldsep.ok
@@ -0,0 +1,13 @@
+4
+some
+stuff
+more
+stuff
+
+2
+junk
+stuff
+
+1
+final
+
diff --git a/contrib/awk/test/noeffect.awk b/contrib/awk/test/noeffect.awk
new file mode 100644
index 0000000..b375a4c
--- /dev/null
+++ b/contrib/awk/test/noeffect.awk
@@ -0,0 +1,4 @@
+BEGIN {
+	s == "hello, world";
+	print s
+}
diff --git a/contrib/awk/test/noeffect.ok b/contrib/awk/test/noeffect.ok
new file mode 100644
index 0000000..b820ddf
--- /dev/null
+++ b/contrib/awk/test/noeffect.ok
@@ -0,0 +1,2 @@
+gawk: noeffect.awk:3: warning: statement may have no effect
+
diff --git a/contrib/awk/test/nofmtch.awk b/contrib/awk/test/nofmtch.awk
new file mode 100644
index 0000000..2ea2249
--- /dev/null
+++ b/contrib/awk/test/nofmtch.awk
@@ -0,0 +1 @@
+BEGIN { printf "%3\n" }
diff --git a/contrib/awk/test/nofmtch.ok b/contrib/awk/test/nofmtch.ok
new file mode 100644
index 0000000..e6f3846
--- /dev/null
+++ b/contrib/awk/test/nofmtch.ok
@@ -0,0 +1,2 @@
+gawk: nofmtch.awk:1: warning: printf format specifier does not have control letter
+%3
diff --git a/contrib/awk/test/nondec.awk b/contrib/awk/test/nondec.awk
new file mode 100644
index 0000000..a680110
--- /dev/null
+++ b/contrib/awk/test/nondec.awk
@@ -0,0 +1 @@
+BEGIN  { print 0x81c3e8, 0x744018 }
diff --git a/contrib/awk/test/nondec.ok b/contrib/awk/test/nondec.ok
new file mode 100644
index 0000000..560f3d1
--- /dev/null
+++ b/contrib/awk/test/nondec.ok
@@ -0,0 +1 @@
+8504296 7618584
diff --git a/contrib/awk/test/nonl.awk b/contrib/awk/test/nonl.awk
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ b/contrib/awk/test/nonl.awk
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/contrib/awk/test/nonl.ok b/contrib/awk/test/nonl.ok
new file mode 100644
index 0000000..24bd9b7
--- /dev/null
+++ b/contrib/awk/test/nonl.ok
@@ -0,0 +1 @@
+gawk: nonl.awk:1: warning: source file does not end in newline
diff --git a/contrib/awk/test/noparms.awk b/contrib/awk/test/noparms.awk
new file mode 100644
index 0000000..2c7ccc7
--- /dev/null
+++ b/contrib/awk/test/noparms.awk
@@ -0,0 +1 @@
+function x(a, b, c , ,) {}
diff --git a/contrib/awk/test/noparms.ok b/contrib/awk/test/noparms.ok
new file mode 100644
index 0000000..4c934c3
--- /dev/null
+++ b/contrib/awk/test/noparms.ok
@@ -0,0 +1,4 @@
+gawk: noparms.awk:1: function x(a, b, c , ,) {}
+gawk: noparms.awk:1:                      ^ parse error
+gawk: noparms.awk:1: function x(a, b, c , ,) {}
+gawk: noparms.awk:1:                       ^ parse error
diff --git a/contrib/awk/test/nors.in b/contrib/awk/test/nors.in
new file mode 100644
index 0000000..f90d9ec
--- /dev/null
+++ b/contrib/awk/test/nors.in
@@ -0,0 +1 @@
+A B C D E
\ No newline at end of file
diff --git a/contrib/awk/test/nors.ok b/contrib/awk/test/nors.ok
new file mode 100644
index 0000000..54d5aab
--- /dev/null
+++ b/contrib/awk/test/nors.ok
@@ -0,0 +1,2 @@
+E
+E
diff --git a/contrib/awk/test/numsubstr.awk b/contrib/awk/test/numsubstr.awk
new file mode 100644
index 0000000..7a30993
--- /dev/null
+++ b/contrib/awk/test/numsubstr.awk
@@ -0,0 +1 @@
+{ print substr(1000+$1, 2) }
diff --git a/contrib/awk/test/numsubstr.in b/contrib/awk/test/numsubstr.in
new file mode 100644
index 0000000..ac65c36
--- /dev/null
+++ b/contrib/awk/test/numsubstr.in
@@ -0,0 +1,3 @@
+5000
+10000
+5000
diff --git a/contrib/awk/test/numsubstr.ok b/contrib/awk/test/numsubstr.ok
new file mode 100644
index 0000000..86ec13c
--- /dev/null
+++ b/contrib/awk/test/numsubstr.ok
@@ -0,0 +1,3 @@
+000
+1000
+000
diff --git a/contrib/awk/test/out1.ok b/contrib/awk/test/out1.ok
new file mode 100644
index 0000000..f54b2b4
--- /dev/null
+++ b/contrib/awk/test/out1.ok
@@ -0,0 +1 @@
+Goes to a file out1
diff --git a/contrib/awk/test/out2.ok b/contrib/awk/test/out2.ok
new file mode 100644
index 0000000..66b7d2f
--- /dev/null
+++ b/contrib/awk/test/out2.ok
@@ -0,0 +1,2 @@
+Normal print statement
+This printed on stdout
diff --git a/contrib/awk/test/out3.ok b/contrib/awk/test/out3.ok
new file mode 100644
index 0000000..7eb822f
--- /dev/null
+++ b/contrib/awk/test/out3.ok
@@ -0,0 +1 @@
+You blew it!
diff --git a/contrib/awk/test/paramdup.awk b/contrib/awk/test/paramdup.awk
new file mode 100644
index 0000000..1f1cc7a
--- /dev/null
+++ b/contrib/awk/test/paramdup.awk
@@ -0,0 +1,8 @@
+BEGIN { foo(0, 1, 2) }
+
+function foo(a, b, c, b, a)
+{
+	print "a =", a
+	print "b =", b
+	print "c =", c
+}
diff --git a/contrib/awk/test/paramdup.ok b/contrib/awk/test/paramdup.ok
new file mode 100644
index 0000000..0308cc8
--- /dev/null
+++ b/contrib/awk/test/paramdup.ok
@@ -0,0 +1,2 @@
+gawk: paramdup.awk:4: error: function `foo': parameter #4, `b', duplicates parameter #2
+gawk: paramdup.awk:4: error: function `foo': parameter #5, `a', duplicates parameter #1
diff --git a/contrib/awk/test/pcntplus.awk b/contrib/awk/test/pcntplus.awk
new file mode 100644
index 0000000..13999ac
--- /dev/null
+++ b/contrib/awk/test/pcntplus.awk
@@ -0,0 +1 @@
+BEGIN { printf "%+d %d\n", 3, 4 }
diff --git a/contrib/awk/test/pcntplus.ok b/contrib/awk/test/pcntplus.ok
new file mode 100644
index 0000000..b790269
--- /dev/null
+++ b/contrib/awk/test/pcntplus.ok
@@ -0,0 +1 @@
++3 4
diff --git a/contrib/awk/test/pid.awk b/contrib/awk/test/pid.awk
new file mode 100644
index 0000000..9b47d90
--- /dev/null
+++ b/contrib/awk/test/pid.awk
@@ -0,0 +1,44 @@
+# From: John C. Oppenheimer <jco@slinky.convex.com>
+# Subject: gawk-3.0.2 pid test
+# To: arnold@skeeve.atl.ga.us
+# Date: Mon, 10 Feb 1997 08:31:55 -0600 (CST)
+# 
+# Thanks for the very quick reply.
+# 
+# This all started when I was looking for how to do the equivalent of
+# "nextfile." I was after documentation and found our gawk down a few
+# revs.
+# 
+# Looks like the nextfile functionality was added somewhere around
+# 2.15.5.  There wasn't a way to do it, until now! Thanks for the
+# functionality!
+# 
+# Saw the /dev/xxx capability and just tried it.
+# 
+# Anyway, I wrote a pid test.  I hope that it is portable.  Wanted to
+# make a user test, but looks like id(1) is not very portable.  But a
+# little test is better than none.
+# 
+# John
+# 
+# pid.ok is a zero length file
+# 
+# ================== pid.awk ============
+BEGIN {
+	getline pid <"/dev/pid"
+	getline ppid <"/dev/ppid"
+}
+NR == 1 {
+	if (pid != $0) {
+		printf "Bad pid %d, wanted %d\n", $0, pid
+	}
+}
+NR == 2 {
+	if (ppid != $0) {
+		printf "Bad ppid %d, wanted %d\n", $0, ppid
+	}
+}
+END {	# ADR --- added
+	close("/dev/pid")
+	close("/dev/ppid")
+}
diff --git a/contrib/awk/test/pid.ok b/contrib/awk/test/pid.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/pid.sh b/contrib/awk/test/pid.sh
new file mode 100755
index 0000000..a19d72c
--- /dev/null
+++ b/contrib/awk/test/pid.sh
@@ -0,0 +1,5 @@
+#! /bin/sh
+AWK=${AWK-../gawk}
+echo $$ > _pid.in
+echo $1 >> _pid.in
+exec $AWK -f pid.awk _pid.in
diff --git a/contrib/awk/test/pipeio1.awk b/contrib/awk/test/pipeio1.awk
new file mode 100644
index 0000000..66f50ad
--- /dev/null
+++ b/contrib/awk/test/pipeio1.awk
@@ -0,0 +1,31 @@
+# From dragon!gamgee.acad.emich.edu!dhw Tue Mar 18 01:12:15 1997
+# Return-Path: <dragon!gamgee.acad.emich.edu!dhw>
+# Message-ID: <m0w6owW-000IDSC@gamgee.acad.emich.edu>
+# Date: Mon, 17 Mar 97 20:48 CST
+# From: dhw@gamgee.acad.emich.edu (David H. West)
+# To: arnold@gnu.ai.mit.edu
+# Subject: gawk 3.0.2 bug report (cc of msg to bug-gnu-utils)
+# Status: OR
+# Content-Length: 869
+# X-Lines: 20
+# X-Display-Position: 2
+# 
+# Nature of bug: operation on a pipe side-effects a different pipe.
+# Observed-With: gawk 3.0.2, Linux kernel 2.0.28
+# Reproduce-By: running the following script, without and with the "close"
+#               statement uncommented.
+# -----------------cut here--------------------------
+BEGIN {FILE1="test1"; FILE2="test2"; 
+       print "1\n" > FILE1; close(FILE1);
+       print "2\n" > FILE2; close(FILE2); 
+       cmd1="cat " FILE1; cmd2="cat " FILE2;
+       #end of preparing commands which give easily-predictable output
+
+       while( (cmd1 | getline)==1) { #terminates as file has only 1 line
+                                     #and we never close cmd1
+          cmd2 | getline L; 
+          #BUG: uncommenting the following line causes an infinite loop
+          close(cmd2);
+          print $0,L;
+          }
+      }
diff --git a/contrib/awk/test/pipeio1.ok b/contrib/awk/test/pipeio1.ok
new file mode 100644
index 0000000..706b09e
--- /dev/null
+++ b/contrib/awk/test/pipeio1.ok
@@ -0,0 +1,2 @@
+1 2
+ 2
diff --git a/contrib/awk/test/pipeio2.awk b/contrib/awk/test/pipeio2.awk
new file mode 100644
index 0000000..6f4f979
--- /dev/null
+++ b/contrib/awk/test/pipeio2.awk
@@ -0,0 +1,67 @@
+# From: megaadm@rina.quantum.de
+# Subject: Bug report - closing down pipes which read from shell com
+# To: bug-gnu-utils@prep.ai.mit.edu
+# Date: Thu, 27 Feb 1997 23:19:16 +0100 (CET)
+# CC: arnold@gnu.ai.mit.edu
+# 
+# Hello people,
+# 
+# i think i found a bug or something mysterious behaviour in
+# gawk Version 3.0 patchlevel 0.
+# 
+# I am running on linux 2.0.25 under bash.
+# 
+# Could you please have a look at the following awk program
+# an let me please know, if this is what i expect it to,
+# namely a bug.
+# 
+# ----------- cut here --------------------------------------------
+BEGIN	{
+			# OS is linux 2.0.25
+			# shell is bash
+			# Gnu Awk (gawk) 3.0, patchlevel 0
+			# The command i typed on the shell was "gawk -f <this_prog> -"
+
+			#com = "cal 01 1997"
+			com = ("cat " SRCDIR "/pipeio2.in")
+
+			while ((com | getline fnam) > 0) {
+
+				com_tr = "echo " fnam " | tr [0-9]. ..........."
+				print "\'" com_tr "\'"
+
+				com_tr | getline nam
+				print nam
+
+				# please run that program and take a look at the
+				# output. I think this is what was expected.
+
+				# Then comment in the following 4 lines and see
+				# what happens. I expect the first pipe "com | getline"
+				# not to be close, but i think this is exactly what happens
+				# So, is this ok ?
+
+				if (close(com_tr) < 0) {
+					print ERRNO
+					break
+				}
+			}
+
+			close(com)
+		}
+# ----------- cut here --------------------------------------------
+# 
+# There is another thing i do not understand.
+# Why doesn't the awk - command "close" reports an
+# error, if i would say close("abc") which i had never
+# openend ?
+# 
+# Regards,
+# Ulrich Gvbel
+# -- 
+# /********************************************************\
+# *     Ulrich Gvbel, goebel@quantum.de                    *
+# *     Quantum Gesellschaft f|r Software mbH, Dortmund    *
+# *     phone  : +49-231-9749-201  fax: +49-231-9749-3     *
+# *     private: +49-231-803994    fax: +49-231-803994     *
+# \********************************************************/
diff --git a/contrib/awk/test/pipeio2.in b/contrib/awk/test/pipeio2.in
new file mode 100644
index 0000000..2652b0e
--- /dev/null
+++ b/contrib/awk/test/pipeio2.in
@@ -0,0 +1,8 @@
+   January 1997
+ S  M Tu  W Th  F  S
+          1  2  3  4
+ 5  6  7  8  9 10 11
+12 13 14 15 16 17 18
+19 20 21 22 23 24 25
+26 27 28 29 30 31
+
diff --git a/contrib/awk/test/pipeio2.ok b/contrib/awk/test/pipeio2.ok
new file mode 100644
index 0000000..3f55c05
--- /dev/null
+++ b/contrib/awk/test/pipeio2.ok
@@ -0,0 +1,16 @@
+'echo    January 1997 | tr [0-9]. ...........'
+January ....
+'echo  S  M Tu  W Th  F  S | tr [0-9]. ...........'
+S M Tu W Th F S
+'echo           1  2  3  4 | tr [0-9]. ...........'
+. . . .
+'echo  5  6  7  8  9 10 11 | tr [0-9]. ...........'
+. . . . . .. ..
+'echo 12 13 14 15 16 17 18 | tr [0-9]. ...........'
+.. .. .. .. .. .. ..
+'echo 19 20 21 22 23 24 25 | tr [0-9]. ...........'
+.. .. .. .. .. .. ..
+'echo 26 27 28 29 30 31 | tr [0-9]. ...........'
+.. .. .. .. .. ..
+'echo  | tr [0-9]. ...........'
+
diff --git a/contrib/awk/test/posix.awk b/contrib/awk/test/posix.awk
new file mode 100644
index 0000000..79474f3
--- /dev/null
+++ b/contrib/awk/test/posix.awk
@@ -0,0 +1,69 @@
+BEGIN {
+	a = "+2"; b = 2; c = "+2a"; d = "+2 "; e = " 2"
+
+	printf "Test #1: "
+	if (b == a) print "\"" a "\"" " compares as a number"
+	else print "\"" a "\"" " compares as a string"
+
+	printf "Test #2: "
+	if (b == c) print "\"" c "\"" " compares as a number"
+	else print "\"" c "\"" " compares as a string"
+
+	printf "Test #3: "
+	if (b == d) print "\"" d "\"" " compares as a number"
+	else print "\"" d "\"" " compares as a string"
+
+	printf "Test #4: "
+	if (b == e) print "\"" e "\"" " compares as a number"
+	else print "\"" e "\"" " compares as a string"
+
+	f = a + b + c + d + e
+	print "after addition"
+
+	printf "Test #5: "
+	if (b == a) print "\"" a "\"" " compares as a number"
+	else print "\"" a "\"" " compares as a string"
+
+	printf "Test #6: "
+	if (b == c) print "\"" c "\"" " compares as a number"
+	else print "\"" c "\"" " compares as a string"
+
+	printf "Test #7: "
+	if (b == d) print "\"" d "\"" " compares as a number"
+	else print "\"" d "\"" " compares as a string"
+
+	printf "Test #8: "
+	if (b == e) print "\"" e "\"" " compares as a number"
+	else print "\"" e "\"" " compares as a string"
+
+	printf "Test #9: "
+	if ("3e5" > "5") print "\"3e5\" > \"5\""
+	else print "\"3e5\" <= \"5\""
+
+	printf "Test #10: "
+	x = 32.14
+	y[x] = "test"
+	OFMT = "%e"
+	print y[x]
+
+	printf "Test #11: "
+	x = x + 0
+	print y[x]
+
+	printf "Test #12: "
+	OFMT="%f"
+	CONVFMT="%e"
+	print 1.5, 1.5 ""
+
+	printf "Test #13: "
+	if ( 1000000 "" == 1000001 "") print "match"
+	else print "nomatch"
+}
+{
+	printf "Test #14: "
+	FS = ":"
+	print $1
+	FS = ","
+	printf "Test #15: "
+	print $2
+}
diff --git a/contrib/awk/test/posix.ok b/contrib/awk/test/posix.ok
new file mode 100644
index 0000000..100b150
--- /dev/null
+++ b/contrib/awk/test/posix.ok
@@ -0,0 +1,16 @@
+Test #1: "+2" compares as a string
+Test #2: "+2a" compares as a string
+Test #3: "+2 " compares as a string
+Test #4: " 2" compares as a string
+after addition
+Test #5: "+2" compares as a string
+Test #6: "+2a" compares as a string
+Test #7: "+2 " compares as a string
+Test #8: " 2" compares as a string
+Test #9: "3e5" <= "5"
+Test #10: test
+Test #11: test
+Test #12: 1.500000 1.500000e+00
+Test #13: nomatch
+Test #14: 1:2,3
+Test #15: 4
diff --git a/contrib/awk/test/poundbang b/contrib/awk/test/poundbang
new file mode 100755
index 0000000..d60652e
--- /dev/null
+++ b/contrib/awk/test/poundbang
@@ -0,0 +1,3 @@
+#! /tmp/gawk -f 
+	{ ccount += length($0) }
+END { printf "average line length is %2.4f\n", ccount/NR}
diff --git a/contrib/awk/test/poundbang.ok b/contrib/awk/test/poundbang.ok
new file mode 100644
index 0000000..143e28d
--- /dev/null
+++ b/contrib/awk/test/poundbang.ok
@@ -0,0 +1 @@
+average line length is 32.6667
diff --git a/contrib/awk/test/prdupval.awk b/contrib/awk/test/prdupval.awk
new file mode 100644
index 0000000..32c67dc
--- /dev/null
+++ b/contrib/awk/test/prdupval.awk
@@ -0,0 +1 @@
+{ print NF, $NF, "abc" $NF }
diff --git a/contrib/awk/test/prdupval.in b/contrib/awk/test/prdupval.in
new file mode 100644
index 0000000..5626abf
--- /dev/null
+++ b/contrib/awk/test/prdupval.in
@@ -0,0 +1 @@
+one
diff --git a/contrib/awk/test/prdupval.ok b/contrib/awk/test/prdupval.ok
new file mode 100644
index 0000000..6253616
--- /dev/null
+++ b/contrib/awk/test/prdupval.ok
@@ -0,0 +1 @@
+1 one abcone
diff --git a/contrib/awk/test/prmarscl.awk b/contrib/awk/test/prmarscl.awk
new file mode 100644
index 0000000..3caf3d9
--- /dev/null
+++ b/contrib/awk/test/prmarscl.awk
@@ -0,0 +1,6 @@
+function test(a)
+{
+	print a[1]
+}
+
+BEGIN	{ j = 4; test(j) }
diff --git a/contrib/awk/test/prmarscl.ok b/contrib/awk/test/prmarscl.ok
new file mode 100644
index 0000000..b42cee6
--- /dev/null
+++ b/contrib/awk/test/prmarscl.ok
@@ -0,0 +1 @@
+gawk: prmarscl.awk:4: fatal: attempt to use scalar parameter 1 as an array
diff --git a/contrib/awk/test/prmreuse.awk b/contrib/awk/test/prmreuse.awk
new file mode 100644
index 0000000..37e06f5
--- /dev/null
+++ b/contrib/awk/test/prmreuse.awk
@@ -0,0 +1,14 @@
+# from Pat Rankin, rankin@eql.caltech.edu
+
+BEGIN { dummy(1); legit(); exit }
+
+function dummy(arg)
+{
+	return arg
+}
+
+function legit(         scratch)
+{
+	split("1 2 3", scratch)
+	return ""
+}
diff --git a/contrib/awk/test/prmreuse.ok b/contrib/awk/test/prmreuse.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/prt1eval.awk b/contrib/awk/test/prt1eval.awk
new file mode 100644
index 0000000..4ecd368
--- /dev/null
+++ b/contrib/awk/test/prt1eval.awk
@@ -0,0 +1,6 @@
+function tst () {
+	sum += 1
+	return sum
+}
+
+BEGIN { OFMT = "%.0f" ; print tst() }
diff --git a/contrib/awk/test/prt1eval.ok b/contrib/awk/test/prt1eval.ok
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/contrib/awk/test/prt1eval.ok
@@ -0,0 +1 @@
+1
diff --git a/contrib/awk/test/prtoeval.awk b/contrib/awk/test/prtoeval.awk
new file mode 100644
index 0000000..77880d8
--- /dev/null
+++ b/contrib/awk/test/prtoeval.awk
@@ -0,0 +1,4 @@
+function returns_a_str() { print "<in function>" ; return "'A STRING'" }
+BEGIN {
+	print "partial line:", returns_a_str()
+}
diff --git a/contrib/awk/test/prtoeval.ok b/contrib/awk/test/prtoeval.ok
new file mode 100644
index 0000000..13e122b
--- /dev/null
+++ b/contrib/awk/test/prtoeval.ok
@@ -0,0 +1,2 @@
+<in function>
+partial line: 'A STRING'
diff --git a/contrib/awk/test/rand.awk b/contrib/awk/test/rand.awk
new file mode 100644
index 0000000..6378f3d
--- /dev/null
+++ b/contrib/awk/test/rand.awk
@@ -0,0 +1,6 @@
+BEGIN {
+	srand(2)
+	for (i = 0; i < 19; i++) 
+		printf "%3d ", (1 + int(100 * rand()))
+	print ""
+}
diff --git a/contrib/awk/test/rand.ok b/contrib/awk/test/rand.ok
new file mode 100644
index 0000000..b6d7554
--- /dev/null
+++ b/contrib/awk/test/rand.ok
@@ -0,0 +1 @@
+ 27  17  86  27  22  53  61  11  33  48  51  97  99  35  20  27  62 100  32 
diff --git a/contrib/awk/test/reg/exp-eq.awk b/contrib/awk/test/reg/exp-eq.awk
new file mode 100644
index 0000000..fed6a69
--- /dev/null
+++ b/contrib/awk/test/reg/exp-eq.awk
@@ -0,0 +1 @@
+{ $0 ^= 3 ; print $1}
diff --git a/contrib/awk/test/reg/exp-eq.good b/contrib/awk/test/reg/exp-eq.good
new file mode 100644
index 0000000..d8d59aa
--- /dev/null
+++ b/contrib/awk/test/reg/exp-eq.good
@@ -0,0 +1,3 @@
+1
+8
+27
diff --git a/contrib/awk/test/reg/exp-eq.in b/contrib/awk/test/reg/exp-eq.in
new file mode 100644
index 0000000..01e79c3
--- /dev/null
+++ b/contrib/awk/test/reg/exp-eq.in
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/contrib/awk/test/reg/exp.awk b/contrib/awk/test/reg/exp.awk
new file mode 100644
index 0000000..4e707f8
--- /dev/null
+++ b/contrib/awk/test/reg/exp.awk
@@ -0,0 +1 @@
+BEGIN { print exp(0), exp(1000000), exp(0.5) }
diff --git a/contrib/awk/test/reg/exp.good b/contrib/awk/test/reg/exp.good
new file mode 100644
index 0000000..07b8853
--- /dev/null
+++ b/contrib/awk/test/reg/exp.good
@@ -0,0 +1,2 @@
+1 gawk: reg/exp.awk:1: warning: exp argument 1e+06 is out of range
+Inf 1.64872
diff --git a/contrib/awk/test/reg/exp.in b/contrib/awk/test/reg/exp.in
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/reg/func.awk b/contrib/awk/test/reg/func.awk
new file mode 100644
index 0000000..e32cd4e
--- /dev/null
+++ b/contrib/awk/test/reg/func.awk
@@ -0,0 +1 @@
+BEGIN { print dummy(1) }
diff --git a/contrib/awk/test/reg/func.good b/contrib/awk/test/reg/func.good
new file mode 100644
index 0000000..d3c7c71
--- /dev/null
+++ b/contrib/awk/test/reg/func.good
@@ -0,0 +1 @@
+gawk: reg/func.awk:1: fatal: function `dummy' not defined
diff --git a/contrib/awk/test/reg/func.in b/contrib/awk/test/reg/func.in
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/reg/func2.awk b/contrib/awk/test/reg/func2.awk
new file mode 100644
index 0000000..2abf2c1
--- /dev/null
+++ b/contrib/awk/test/reg/func2.awk
@@ -0,0 +1,2 @@
+function dummy() { ; }
+BEGIN { print dummy (1) }
diff --git a/contrib/awk/test/reg/func2.good b/contrib/awk/test/reg/func2.good
new file mode 100644
index 0000000..ae87bc3
--- /dev/null
+++ b/contrib/awk/test/reg/func2.good
@@ -0,0 +1,2 @@
+gawk: reg/func2.awk:2: fatal: function `dummy' called with space between name and (,
+or used in other expression context
diff --git a/contrib/awk/test/reg/func2.in b/contrib/awk/test/reg/func2.in
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/reg/log.awk b/contrib/awk/test/reg/log.awk
new file mode 100644
index 0000000..bcae90b
--- /dev/null
+++ b/contrib/awk/test/reg/log.awk
@@ -0,0 +1 @@
+BEGIN { print log(0), log(-1), log(100) }
diff --git a/contrib/awk/test/reg/log.good b/contrib/awk/test/reg/log.good
new file mode 100644
index 0000000..857ab77
--- /dev/null
+++ b/contrib/awk/test/reg/log.good
@@ -0,0 +1,4 @@
+log: SING error
+-Inf gawk: reg/log.awk:1: warning: log called with negative argument -1
+log: DOMAIN error
+NaN 4.60517
diff --git a/contrib/awk/test/reg/log.in b/contrib/awk/test/reg/log.in
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/regtest b/contrib/awk/test/regtest
new file mode 100755
index 0000000..72b0dbf
--- /dev/null
+++ b/contrib/awk/test/regtest
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+case "$AWK" in
+"")	AWK=../gawk ;;
+esac
+#AWK=${AWK:-../gawk}
+
+for i in reg/*.awk
+do
+	it=`basename $i .awk`
+	$AWK -f $i <reg/$it.in >reg/$it.out 2>&1
+	if cmp -s reg/$it.out reg/$it.good
+	then
+		rm -f reg/$it.out
+	else
+		echo "regtest: $it fails"
+	fi
+done
diff --git a/contrib/awk/test/reindops.awk b/contrib/awk/test/reindops.awk
new file mode 100644
index 0000000..13ae657
--- /dev/null
+++ b/contrib/awk/test/reindops.awk
@@ -0,0 +1,6 @@
+{ 
+	if ($1 !~ /^+[2-9]/)
+		print "gawk is broken"
+	else
+		print "gawk is ok"
+}
diff --git a/contrib/awk/test/reindops.in b/contrib/awk/test/reindops.in
new file mode 100644
index 0000000..b1e5435
--- /dev/null
+++ b/contrib/awk/test/reindops.in
@@ -0,0 +1 @@
++44 123 456
diff --git a/contrib/awk/test/reindops.ok b/contrib/awk/test/reindops.ok
new file mode 100644
index 0000000..f9605fd
--- /dev/null
+++ b/contrib/awk/test/reindops.ok
@@ -0,0 +1 @@
+gawk is ok
diff --git a/contrib/awk/test/reint.awk b/contrib/awk/test/reint.awk
new file mode 100644
index 0000000..add0f2a
--- /dev/null
+++ b/contrib/awk/test/reint.awk
@@ -0,0 +1 @@
+{ print match($0, /a{3}/) }
diff --git a/contrib/awk/test/reint.in b/contrib/awk/test/reint.in
new file mode 100644
index 0000000..43caa2a
--- /dev/null
+++ b/contrib/awk/test/reint.in
@@ -0,0 +1 @@
+match this: aaa
diff --git a/contrib/awk/test/reint.ok b/contrib/awk/test/reint.ok
new file mode 100644
index 0000000..b1bd38b
--- /dev/null
+++ b/contrib/awk/test/reint.ok
@@ -0,0 +1 @@
+13
diff --git a/contrib/awk/test/reparse.awk b/contrib/awk/test/reparse.awk
new file mode 100644
index 0000000..433ecbb
--- /dev/null
+++ b/contrib/awk/test/reparse.awk
@@ -0,0 +1,7 @@
+{
+	gsub(/x/, " ")
+	$0 = $0
+	print $1
+	print $0
+	print $1, $2, $3
+}
diff --git a/contrib/awk/test/reparse.in b/contrib/awk/test/reparse.in
new file mode 100644
index 0000000..6f31cde
--- /dev/null
+++ b/contrib/awk/test/reparse.in
@@ -0,0 +1 @@
+1 axbxc 2
diff --git a/contrib/awk/test/reparse.ok b/contrib/awk/test/reparse.ok
new file mode 100644
index 0000000..6bdfacf
--- /dev/null
+++ b/contrib/awk/test/reparse.ok
@@ -0,0 +1,3 @@
+1
+1 a b c 2
+1 a b
diff --git a/contrib/awk/test/resplit.ok b/contrib/awk/test/resplit.ok
new file mode 100644
index 0000000..6178079
--- /dev/null
+++ b/contrib/awk/test/resplit.ok
@@ -0,0 +1 @@
+b
diff --git a/contrib/awk/test/rs.in b/contrib/awk/test/rs.in
new file mode 100644
index 0000000..edef835
--- /dev/null
+++ b/contrib/awk/test/rs.in
@@ -0,0 +1,15 @@
+
+
+a
+b
+
+
+c d
+
+
+
+e
+
+
+
+
diff --git a/contrib/awk/test/rs.ok b/contrib/awk/test/rs.ok
new file mode 100644
index 0000000..9dd6bd3
--- /dev/null
+++ b/contrib/awk/test/rs.ok
@@ -0,0 +1,3 @@
+a b
+c d
+e 
diff --git a/contrib/awk/test/rswhite.awk b/contrib/awk/test/rswhite.awk
new file mode 100644
index 0000000..0048765
--- /dev/null
+++ b/contrib/awk/test/rswhite.awk
@@ -0,0 +1,2 @@
+BEGIN { RS = "" }
+{ printf("<%s>\n", $0) }
diff --git a/contrib/awk/test/rswhite.in b/contrib/awk/test/rswhite.in
new file mode 100644
index 0000000..39f7756
--- /dev/null
+++ b/contrib/awk/test/rswhite.in
@@ -0,0 +1,2 @@
+    a b
+c d
diff --git a/contrib/awk/test/rswhite.ok b/contrib/awk/test/rswhite.ok
new file mode 100644
index 0000000..a029e47
--- /dev/null
+++ b/contrib/awk/test/rswhite.ok
@@ -0,0 +1,2 @@
+<    a b
+c d>
diff --git a/contrib/awk/test/sclforin.awk b/contrib/awk/test/sclforin.awk
new file mode 100644
index 0000000..335e854
--- /dev/null
+++ b/contrib/awk/test/sclforin.awk
@@ -0,0 +1 @@
+BEGIN { j = 4; for (i in j) print j[i] }
diff --git a/contrib/awk/test/sclforin.ok b/contrib/awk/test/sclforin.ok
new file mode 100644
index 0000000..d87fa61
--- /dev/null
+++ b/contrib/awk/test/sclforin.ok
@@ -0,0 +1 @@
+gawk: sclforin.awk:1: fatal: attempt to use scalar as array
diff --git a/contrib/awk/test/sclifin.awk b/contrib/awk/test/sclifin.awk
new file mode 100644
index 0000000..64f5d0d
--- /dev/null
+++ b/contrib/awk/test/sclifin.awk
@@ -0,0 +1,7 @@
+BEGIN {
+	j = 4
+	if ("foo" in j)
+		print "ouch"
+	else
+		print "ok"
+}
diff --git a/contrib/awk/test/sclifin.ok b/contrib/awk/test/sclifin.ok
new file mode 100644
index 0000000..717f836
--- /dev/null
+++ b/contrib/awk/test/sclifin.ok
@@ -0,0 +1 @@
+gawk: sclifin.awk:7: fatal: attempt to use scalar as array
diff --git a/contrib/awk/test/splitargv.awk b/contrib/awk/test/splitargv.awk
new file mode 100644
index 0000000..10886ef
--- /dev/null
+++ b/contrib/awk/test/splitargv.awk
@@ -0,0 +1,7 @@
+BEGIN   {
+          for (idx = 1; idx < ARGC; idx++)
+            split(ARGV[idx], temp, ".");
+        }
+        {
+          print $0;
+        }
diff --git a/contrib/awk/test/splitargv.in b/contrib/awk/test/splitargv.in
new file mode 100644
index 0000000..10886ef
--- /dev/null
+++ b/contrib/awk/test/splitargv.in
@@ -0,0 +1,7 @@
+BEGIN   {
+          for (idx = 1; idx < ARGC; idx++)
+            split(ARGV[idx], temp, ".");
+        }
+        {
+          print $0;
+        }
diff --git a/contrib/awk/test/splitargv.ok b/contrib/awk/test/splitargv.ok
new file mode 100644
index 0000000..10886ef
--- /dev/null
+++ b/contrib/awk/test/splitargv.ok
@@ -0,0 +1,7 @@
+BEGIN   {
+          for (idx = 1; idx < ARGC; idx++)
+            split(ARGV[idx], temp, ".");
+        }
+        {
+          print $0;
+        }
diff --git a/contrib/awk/test/splitvar.awk b/contrib/awk/test/splitvar.awk
new file mode 100644
index 0000000..9e1ac79
--- /dev/null
+++ b/contrib/awk/test/splitvar.awk
@@ -0,0 +1,5 @@
+{
+	sep = "=+"
+	n = split($0, a, sep)
+	print n
+}
diff --git a/contrib/awk/test/splitvar.in b/contrib/awk/test/splitvar.in
new file mode 100644
index 0000000..85be8ee
--- /dev/null
+++ b/contrib/awk/test/splitvar.in
@@ -0,0 +1 @@
+Here===Is=Some=====Data
diff --git a/contrib/awk/test/splitvar.ok b/contrib/awk/test/splitvar.ok
new file mode 100644
index 0000000..b8626c4
--- /dev/null
+++ b/contrib/awk/test/splitvar.ok
@@ -0,0 +1 @@
+4
diff --git a/contrib/awk/test/splitwht.awk b/contrib/awk/test/splitwht.awk
new file mode 100644
index 0000000..6163d72
--- /dev/null
+++ b/contrib/awk/test/splitwht.awk
@@ -0,0 +1,7 @@
+BEGIN {
+	str = "a   b\t\tc d"
+	n = split(str, a, " ")
+	print n
+	m = split(str, b, / /)
+	print m
+}
diff --git a/contrib/awk/test/splitwht.ok b/contrib/awk/test/splitwht.ok
new file mode 100644
index 0000000..61c83cb
--- /dev/null
+++ b/contrib/awk/test/splitwht.ok
@@ -0,0 +1,2 @@
+4
+5
diff --git a/contrib/awk/test/sprintfc.awk b/contrib/awk/test/sprintfc.awk
new file mode 100644
index 0000000..ee1e5a7
--- /dev/null
+++ b/contrib/awk/test/sprintfc.awk
@@ -0,0 +1 @@
+{ print sprintf("%c", $1), $1 }
diff --git a/contrib/awk/test/sprintfc.in b/contrib/awk/test/sprintfc.in
new file mode 100644
index 0000000..4602d28
--- /dev/null
+++ b/contrib/awk/test/sprintfc.in
@@ -0,0 +1,3 @@
+65
+66
+foo
diff --git a/contrib/awk/test/sprintfc.ok b/contrib/awk/test/sprintfc.ok
new file mode 100644
index 0000000..33769a8
--- /dev/null
+++ b/contrib/awk/test/sprintfc.ok
@@ -0,0 +1,3 @@
+A 65
+B 66
+f foo
diff --git a/contrib/awk/test/strftlng.awk b/contrib/awk/test/strftlng.awk
new file mode 100644
index 0000000..0ef8195
--- /dev/null
+++ b/contrib/awk/test/strftlng.awk
@@ -0,0 +1,11 @@
+# test file from Paul Eggert, eggert@twinsun.com
+# modified for portability (%c doesn't cut it)
+
+BEGIN {
+	BUFSIZ = 1024
+	simpleformat = format = "%m/%d/%y %H:%M:%S\n"
+	clen = length(strftime(format, 0))
+	for (i = 1; i < BUFSIZ / clen + 1; i++)
+		format = format simpleformat
+	printf "%s", strftime(format, 0)
+}
diff --git a/contrib/awk/test/strftlng.ok b/contrib/awk/test/strftlng.ok
new file mode 100644
index 0000000..3008aa2
--- /dev/null
+++ b/contrib/awk/test/strftlng.ok
@@ -0,0 +1,58 @@
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
+01/01/70 00:00:00
diff --git a/contrib/awk/test/substr.awk b/contrib/awk/test/substr.awk
new file mode 100644
index 0000000..6016369
--- /dev/null
+++ b/contrib/awk/test/substr.awk
@@ -0,0 +1,14 @@
+BEGIN {
+	x = "A"
+	printf("%-39s\n", substr(x,1,39))
+	print substr("abcdef", 0, 2)
+	print substr("abcdef", 2.3, 2)
+	print substr("abcdef", -1, 2)
+	print substr("abcdef", 1, 0)
+	print substr("abcdef", 1, -3)
+	print substr("abcdef", 1, 2.3)
+	print substr("", 1, 2)
+	print substr("abcdef", 5, 5)
+	print substr("abcdef", 7, 2)
+	exit (0)
+}
diff --git a/contrib/awk/test/substr.ok b/contrib/awk/test/substr.ok
new file mode 100644
index 0000000..be6889d
--- /dev/null
+++ b/contrib/awk/test/substr.ok
@@ -0,0 +1,10 @@
+A                                      
+ab
+bc
+ab
+
+
+ab
+
+ef
+
diff --git a/contrib/awk/test/swaplns.awk b/contrib/awk/test/swaplns.awk
new file mode 100644
index 0000000..6bf2240
--- /dev/null
+++ b/contrib/awk/test/swaplns.awk
@@ -0,0 +1,7 @@
+{
+	if ((getline tmp) > 0) {
+		print tmp
+		print
+	} else
+		print
+}
diff --git a/contrib/awk/test/swaplns.in b/contrib/awk/test/swaplns.in
new file mode 100644
index 0000000..71fb162
--- /dev/null
+++ b/contrib/awk/test/swaplns.in
@@ -0,0 +1,9 @@
+This directory contains some examples/test-cases for different
+features of gawk - mostly not present in an old awk.  Some are from
+"The GAWK Manual", some are original, and some are mixture of the two.
+Read header comments before attempting to use.  Have fun and remember
+that program which consists only of BEGIN block does not need an input
+file.
+
+   --mj
+
diff --git a/contrib/awk/test/swaplns.ok b/contrib/awk/test/swaplns.ok
new file mode 100644
index 0000000..d38b7ca
--- /dev/null
+++ b/contrib/awk/test/swaplns.ok
@@ -0,0 +1,9 @@
+features of gawk - mostly not present in an old awk.  Some are from
+This directory contains some examples/test-cases for different
+Read header comments before attempting to use.  Have fun and remember
+"The GAWK Manual", some are original, and some are mixture of the two.
+file.
+that program which consists only of BEGIN block does not need an input
+   --mj
+
+
diff --git a/contrib/awk/test/tradanch.awk b/contrib/awk/test/tradanch.awk
new file mode 100644
index 0000000..0cd45d1
--- /dev/null
+++ b/contrib/awk/test/tradanch.awk
@@ -0,0 +1,2 @@
+/foo^bar/
+/foo$bar/
diff --git a/contrib/awk/test/tradanch.in b/contrib/awk/test/tradanch.in
new file mode 100644
index 0000000..e5c8a09
--- /dev/null
+++ b/contrib/awk/test/tradanch.in
@@ -0,0 +1,2 @@
+foo^bar
+foo$bar
diff --git a/contrib/awk/test/tradanch.ok b/contrib/awk/test/tradanch.ok
new file mode 100644
index 0000000..e69de29
diff --git a/contrib/awk/test/tweakfld.awk b/contrib/awk/test/tweakfld.awk
new file mode 100644
index 0000000..e7b538f
--- /dev/null
+++ b/contrib/awk/test/tweakfld.awk
@@ -0,0 +1,296 @@
+# To: bug-gnu-utils@prep.ai.mit.edu
+# Cc: arnold@gnu.ai.mit.edu
+# Date: Mon, 20 Nov 1995 11:39:29 -0500
+# From: "R. Hank Donnelly" <emory!head-cfa.harvard.edu!donnelly>
+# 
+# Operating system: Linux1.2.13 (Slackware distrib)
+# GAWK version: 2.15 (?)
+# compiler: GCC (?)
+# 
+# The following enclosed script does not want to fully process the input data
+# file. It correctly executes the operations on the first record, and then dies
+# on the second one. My true data file is much longer but this is
+# representative and it does fail on a file even as short as this one.
+# The failure appears to occur in the declared function add2output. Between the
+# steps of incrementing NF by one and setting $NF to the passed variable
+# the passed variable appears to vanish (i.e. NF does go from 68 to 69
+# and before incrementing it "variable" equals what it should but after
+# "variable" has no value at all.)
+# 
+# The scripts have been developed using nawk on a Sun (where they run fine)
+# I have tried gawk there but get a different crash which I have not yet traced
+# down. Ideally I would like to keep the script the same so that it would run
+# on either gawk or nawk (that way I can step back and forth between laptop and
+# workstation.
+# 
+# Any ideas why the laptop installation is having problems?
+# Hank 
+# 
+# 
+# #!/usr/bin/gawk -f
+
+BEGIN {
+	# set a few values
+	FS = "\t"
+	OFS = "\t"
+	pi = atan2(0, -1)
+# distance from HRMA to focal plane in mm
+	fullradius = 10260.54
+
+	# set locations of parameters on input line
+	nf_nrg = 1
+	nf_order = 3
+	nf_item = 4
+	nf_suite = 5
+	nf_grating = 8
+	nf_shutter = 9
+	nf_type = 13
+	nf_src = 14
+	nf_target = 15
+	nf_voltage = 16
+	nf_flux = 17
+	nf_filt1 = 20
+	nf_filt1_th = 21
+	nf_filt2 = 22
+	nf_filt2_th = 23
+	nf_bnd = 24
+	nf_hrma_polar = 27
+	nf_hrma_az = 28
+	nf_detector = 30
+	nf_acis_read = 32
+	nf_acis_proc = 33
+	nf_acis_frame = 34
+	nf_hxda_aplist = 36
+	nf_hxda_y_range = 37
+	nf_hxda_z_range = 38
+	nf_hxda_y_step = 39
+	nf_hxda_z_step = 40
+	nf_sim_z = 41
+	nf_fam_polar = 43
+	nf_fam_az = 44
+	nf_fam_dither_type = 45
+	nf_mono_init = 51
+	nf_mono_range = 52
+	nf_mono_step = 53
+	nf_defocus = 54
+	nf_acis_temp = 55
+	nf_tight = 59
+	nf_offset_y = 64
+	nf_offset_z = 65
+
+	while( getline < "xrcf_mnemonics.dat" > 0 ) {
+		mnemonic[$1] = $2
+	}
+
+#	"date" | getline date_line
+# ADR: use a fixed date so that testing will work
+	date_line = "Sun Mar 10 23:00:27 EST 1996"
+        split(date_line, in_date, " ")
+        out_date = in_date[2] " " in_date[3] ", " in_date[6]
+}
+
+function add2output( variable ) {
+#print("hi1") >> "debug"
+	NF++
+#print("hi2") >> "debug"
+ 	$NF = variable
+#print("hi3") >> "debug"
+}
+
+function error( ekey, message ) {
+	print "Error at input line " NR ", anode " ekey >> "errors.cleanup"
+	print "   " message "." >> "errors.cleanup"
+}
+
+function hxda_na() {
+	$nf_hxda_aplist = $nf_hxda_y_range = $nf_hxda_z_range = "N/A"
+	$nf_hxda_y_step = $nf_hxda_z_step = "N/A"
+}
+
+function acis_na() {
+	$nf_acis_read = $nf_acis_proc = $nf_acis_frame = $nf_acis_temp = "N/A"
+}
+
+function hrc_na() {
+#        print ("hi") >> "debug"
+}
+
+function fpsi_na() {
+	acis_na()
+	hrc_na()
+	$nf_sim_z = $nf_fam_polar = $nf_fam_az = $nf_fam_dither_type = "N/A"
+}
+
+function mono_na() {
+	$nf_mono_init = $nf_mono_range = $nf_mono_step = "N/A"
+}
+
+# this gives the pitch and yaw of the HRMA and FAM
+# positive pitch is facing the source "looking down"
+# positive yaw is looking left
+# 0 az is north 90 is up
+# this also adds in the FAM X,Y,Z positions 
+
+function polaz2yawpitch(polar, az) {
+	theta = az * pi / 180
+	phi = polar * pi / 180 / 60
+
+
+	if( polar == 0 ) {
+		add2output( 0 )
+		add2output( 0 )
+	} else {
+		if(az == 0 || az == 180)
+			add2output( 0 )
+		else 
+			add2output( - polar * sin(theta) )
+
+
+#			x = cos (phi)
+#			y = sin (phi) * cos (theta)
+#			add2output( atan2(y,x)*180 / pi * 60 )
+		
+		if(az == 90 || az ==270 )
+			add2output( 0 )
+		else 
+			add2output( - polar * cos(theta) )
+
+	}
+#			x = cos (phi)
+#			z= sin (phi) * sin (theta)
+#			add2output( atan2(z,x)*180 / pi * 60 )
+
+	if(config !~ /HXDA/) {
+# negative values of defocus move us farther from the source thus
+# increasing radius
+		radius = fullradius - defocus
+
+# FAM_x; FAM_y;  FAM_z
+	   	if((offset_y == 0) && (offset_z == 0)){
+			add2output( fullradius - radius * cos (phi) )
+	
+			if (az == 90 || az ==270) 
+				add2output( 0 )
+			else
+				add2output(  radius * sin (phi) * cos (theta) )
+			
+			if (az == 0 || az == 180)
+				add2output( 0 )
+			else		
+				add2output( - radius * sin (phi) * sin (theta) )
+	   	} else {
+# ******* THIS SEGMENT OF CODE IS NOT MATHEMATICALLY CORRECT FOR ****
+# OFF AXIS ANGLES AND IS SUPPLIED AS A WORKAROUND SINCE IT WILL
+# PROBABLY ONLY BE USED ON AXIS.
+			add2output( defocus )
+			add2output( offset_y )
+			add2output( offset_z )
+		}
+
+	} else {
+		add2output( "N/A" )
+		add2output( "N/A" )
+		add2output( "N/A" )
+	}
+}
+
+# set TIGHT/LOOSE to N/A if it is not one of the two allowed values
+function tight_na() {
+	if( $nf_tight !~ /TIGHT|LOOSE/ ) {
+		$nf_tight == "N/A"
+	}
+}
+
+# this entry is used to give certain entries names
+{
+	type = $nf_type
+	item = $nf_item
+	suite = $nf_suite
+	order = $nf_order
+	detector = $nf_detector
+	grating = $nf_grating
+	offset_y= $nf_offset_y
+	offset_z= $nf_offset_z
+	bnd = $nf_bnd
+	defocus = $nf_defocus
+}
+
+{
+	# make configuration parameter
+	# as well as setting configuration-dependent N/A values
+
+	if( $nf_bnd ~ "SCAN" ) {
+		# BND is scanning beam
+		config = "BND"
+		hxda_na()
+		fpsi_na()
+	} else {
+		if( grating == "NONE" ) {
+			config = "HRMA"
+		} else {
+			if( grating == "HETG" ) {
+				if( order != "Both" ) {
+				    $nf_shutter = order substr($nf_shutter, \
+					index($nf_shutter, ",") )
+				}
+			} else {
+				order = "N/A"
+			}
+			config = "HRMA/" grating
+		}
+	
+		if( detector ~ /ACIS|HRC/ ) {
+			detsys = detector
+			nsub = sub("-", ",", detsys)
+			config = config "/" detsys
+			hxda_na()
+		} else {
+			config = config "/HXDA"
+			fpsi_na()
+			if( detector == "HSI" ) {
+				hxda_na()
+			}
+		}
+	}
+
+	add2output( config )
+
+	if( $nf_src ~ /EIPS|Penning/ ) mono_na()
+
+	if( $nf_src == "Penning" ) $nf_voltage = "N/A"
+
+	itm = sprintf("%03d", item)
+
+	if(config in mnemonic) {
+		if( type in mnemonic ) {
+		    ID = mnemonic[config] "-" mnemonic[type] "-" suite "." itm
+		    add2output( ID )
+		} else {
+			error(type, "measurement type not in list")
+		}
+	} else {
+		error(config, "measurement configuration not in list")
+	}
+
+	# add date to output line
+	add2output( out_date )
+
+	# Convert HRMA polar and azimuthal angles to yaw and pitch
+	polaz2yawpitch($nf_hrma_polar, $nf_hrma_az)
+
+	# set TIGHT/LOOSE to N/A if it is not one of the two allowed values
+	tight_na()
+
+	# compute number of HXDA apertures
+	if( config ~ /HXDA/ && $nf_hxda_aplist != "N/A") 
+		add2output( split( $nf_hxda_aplist, dummy, "," ) )
+	else
+		add2output( "N/A" )
+
+	# make sure the BND value is properly set
+	if($nf_bnd == "FIXED" && detector ~ /ACIS/)
+		$nf_bnd =bnd"-SYNC"
+	else
+		$nf_bnd = bnd"-FREE"
+	print
+}
diff --git a/contrib/awk/test/tweakfld.in b/contrib/awk/test/tweakfld.in
new file mode 100644
index 0000000..e27a9dde
--- /dev/null
+++ b/contrib/awk/test/tweakfld.in
@@ -0,0 +1,3 @@
+0.277	N/A	N/A	1	1	ASC/Hank Donnelly	N/A	NONE	ALL,ALL	N/A	N/A	N/A	Count Rate Linearity	EIPS	C-Ka	1.108	0.13484	N/A	N/A	C8H8	10.32	C8H8	20.64	FIXED	1000	NO	0	0	0	HRC,I	1000	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	0	N/A	APT	APT	LISSAJOUS	44.7175	44.7175	1	N/A	N/A	N/A	N/A	N/A	0	N/A	HRCCTRTLIN	0	N/A	N/A	N/A	10	N/A	180	0	0	N/A	N/A	FPSI rate
+1.486 	N/A	N/A	2	1	ASC/Hank Donnelly	N/A	NONE	ALL,ALL	N/A	N/A	N/A	Count Rate Linearity	EIPS	Al-Ka	4.458	0.642119	N/A	N/A	Al	18.38	Al	36.76	FIXED	1000	NO	0	0	0	HRC,I	1000	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	0	N/A	APT	APT	LISSAJOUS	5.55556	5.55556	1	N/A	N/A	N/A	N/A	N/A	0	N/A	HRCCTRTLIN	0	N/A	N/A	N/A	10	N/A	180	0	0	N/A	N/A	FPSI rate
+4.51  	N/A	N/A	3	1	ASC/Hank Donnelly	N/A	NONE	ALL,ALL	N/A	N/A	N/A	Count Rate Linearity	EIPS	Ti-Ka	22.55	3.02894	N/A	N/A	Ti	40.6	N/A	N/A	FIXED	1000	NO	0	0	0	HRC,I	1000	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	0	N/A	APT	APT	LISSAJOUS	5.55556	5.55556	1	N/A	N/A	N/A	N/A	N/A	0	N/A	HRCCTRTLIN	0	N/A	N/A	N/A	10	N/A	180	0	0	N/A	N/A	FPSI rate
diff --git a/contrib/awk/test/tweakfld.ok b/contrib/awk/test/tweakfld.ok
new file mode 100644
index 0000000..3c4d894
--- /dev/null
+++ b/contrib/awk/test/tweakfld.ok
@@ -0,0 +1,3 @@
+0.277	N/A	N/A	1	1	ASC/Hank Donnelly	N/A	NONE	ALL,ALL	N/A	N/A	N/A	Count Rate Linearity	EIPS	C-Ka	1.108	0.13484	N/A	N/A	C8H8	10.32	C8H8	20.64	FIXED-FREE	1000	NO	0	0	0	HRC,I	1000	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	0	N/A	APT	APT	LISSAJOUS	44.7175	44.7175	1	N/A	N/A	N/A	N/A	N/A	0	N/A	HRCCTRTLIN	0	N/A	N/A	N/A	10	N/A	180	0	0	N/A	N/A	FPSI rate	HRMA/HRC,I	Mar 10, 1996	0	0	0	0	0	N/A
+1.486 	N/A	N/A	2	1	ASC/Hank Donnelly	N/A	NONE	ALL,ALL	N/A	N/A	N/A	Count Rate Linearity	EIPS	Al-Ka	4.458	0.642119	N/A	N/A	Al	18.38	Al	36.76	FIXED-FREE	1000	NO	0	0	0	HRC,I	1000	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	0	N/A	APT	APT	LISSAJOUS	5.55556	5.55556	1	N/A	N/A	N/A	N/A	N/A	0	N/A	HRCCTRTLIN	0	N/A	N/A	N/A	10	N/A	180	0	0	N/A	N/A	FPSI rate	HRMA/HRC,I	Mar 10, 1996	0	0	0	0	0	N/A
+4.51  	N/A	N/A	3	1	ASC/Hank Donnelly	N/A	NONE	ALL,ALL	N/A	N/A	N/A	Count Rate Linearity	EIPS	Ti-Ka	22.55	3.02894	N/A	N/A	Ti	40.6	N/A	N/A	FIXED-FREE	1000	NO	0	0	0	HRC,I	1000	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	N/A	0	N/A	APT	APT	LISSAJOUS	5.55556	5.55556	1	N/A	N/A	N/A	N/A	N/A	0	N/A	HRCCTRTLIN	0	N/A	N/A	N/A	10	N/A	180	0	0	N/A	N/A	FPSI rate	HRMA/HRC,I	Mar 10, 1996	0	0	0	0	0	N/A
diff --git a/contrib/awk/version.c b/contrib/awk/version.c
new file mode 100644
index 0000000..e84bb7e
--- /dev/null
+++ b/contrib/awk/version.c
@@ -0,0 +1,50 @@
+char *version_string = "@(#)GNU Awk 3.0";
+
+/* 1.02		fixed /= += *= etc to return the new Left Hand Side instead
+		of the Right Hand Side */
+
+/* 1.03		Fixed split() to treat strings of space and tab as FS if
+		the split char is ' '.
+
+		Added -v option to print version number
+ 		
+		Fixed bug that caused rounding when printing large numbers  */
+
+/* 2.00beta	Incorporated the functionality of the "new" awk as described
+		the book (reference not handy).  Extensively tested, but no 
+		doubt still buggy.  Badly needs tuning and cleanup, in
+		particular in memory management which is currently almost
+		non-existent. */
+
+/* 2.01		JF:  Modified to compile under GCC, and fixed a few
+		bugs while I was at it.  I hope I didn't add any more.
+		I modified parse.y to reduce the number of reduce/reduce
+		conflicts.  There are still a few left. */
+
+/* 2.02		Fixed JF's bugs; improved memory management, still needs
+		lots of work. */
+
+/* 2.10		Major grammar rework and lots of bug fixes from David.
+		Major changes for performance enhancements from David.
+		A number of minor bug fixes and new features from Arnold.
+		Changes for MSDOS from Conrad Kwok and Scott Garfinkle.
+		The gawk.texinfo and info files included! */
+
+/* 2.11		Bug fix release to 2.10.  Lots of changes for portability,
+		speed, and configurability.  */
+
+/* 2.12		Lots of changes for portability, speed, and configurability.
+		Several bugs fixed.  POSIX compliance.  Removal of last set
+		of hard-wired limits.  Atari and VMS ports added. */
+
+/* 2.13		Public release of 2.12 */
+
+/* 2.14		Mostly bug fixes. */
+
+/* 2.15		Bug fixes plus intermixing of command-line source and files,
+		GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files.
+		`delete array'. OS/2 port added. */
+
+/* 3.0		RS as regexp, RT variable, FS = "", fflush builtin, posix
+		regexps, IGNORECASE applies to all comparison, autoconf, source
+		code cleanup. See the NEWS file. */
-- 
cgit v1.1